In [3]:
!pip install jupyter_dash dash



In [4]:
# Import libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import linregress, pearsonr
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output
import joblib

# Load data
df = pd.read_csv('/content/final_bird_migration_dataset.csv')

# Preprocess dates
df['obsDt'] = pd.to_datetime(df['obsDt'])
df['arrival_date'] = pd.to_datetime(df['arrival_date'])
df['departure_date'] = pd.to_datetime(df['departure_date'])
df['year'] = df['obsDt'].dt.year
df['arrival_doy'] = df['arrival_date'].dt.dayofyear
df['departure_doy'] = df['departure_date'].dt.dayofyear
species_list = df['comName'].unique()
climate_vars = ['temperature', 'precipitation', 'humidity', 'wind_speed']

print(species_list)
print(climate_vars)
df.head()

['Ring-billed Gull' 'Black-and-white Warbler' 'Bushtit' 'Evening Grosbeak'
 'Green Kingfisher']
['temperature', 'precipitation', 'humidity', 'wind_speed']


Unnamed: 0,speciesCode,comName,sciName,locId,locName,obsDt,howMany,lat,lng,obsValid,...,arrival_date,departure_date,migration_distance_km,route_complexity_score,stopover_count,avg_stopover_duration_days,migration_type,climate_zone,arrival_doy,departure_doy
0,ribgul,Ring-billed Gull,Larus delawarensis,L342269,Walden Ponds Wildlife Habitat--Walden & Sawhil...,2024-07-25,54.0,40.044253,-105.186772,True,...,2024-03-01,2024-07-26,1179.830704,0.668289,1,9.982641,Long-distance,Subtropical,61,208
1,bawwar,Black-and-white Warbler,Mniotilta varia,L13733170,"751 NE 160th Ave, Williston US-FL 29.34111, -8...",2024-06-24,1.0,29.341115,-82.503038,True,...,2024-03-09,2024-06-10,867.489788,0.194321,7,6.271843,Short-distance,Temperate,69,162
2,bushti,Bushtit,Psaltriparus minimus,L583568,Home Alta Mesa Rd-Wilton,2020-07-07,2.0,38.399372,-121.222064,True,...,2020-03-14,2020-05-22,881.084946,0.557524,7,4.346925,Short-distance,Temperate,74,143
3,ribgul,Ring-billed Gull,Larus delawarensis,L1837850,Holden Beach--Beachfront,2024-08-16,1.0,33.910765,-78.296567,True,...,2024-02-16,2024-05-02,1261.699633,0.459173,7,1.200906,Short-distance,Temperate,47,123
4,ribgul,Ring-billed Gull,Larus delawarensis,L3121964,High Island Beach (Gulf Seawatch),2021-07-14,2.0,29.549799,-94.387797,True,...,2021-02-17,2021-07-31,835.716136,0.990675,3,8.158405,Long-distance,Tropical,48,212


**Temporal Analysis**

In [5]:
# Function to calculate trends
def calculate_trend(species_data, column):
    x = species_data['year']
    y = species_data[column]
    print(linregress(x, y))
    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    return slope, intercept

# Temporal analysis
trends = []
for species in species_list:
    species_data = df[df['comName'] == species]
    if len(species_data) > 10:
        arrival_slope, arrival_intercept = calculate_trend(species_data, 'arrival_doy')
        departure_slope, departure_intercept = calculate_trend(species_data, 'departure_doy')
        trends.append({
            'Species': species,
            'Arrival_Slope': arrival_slope,
            'Departure_Slope': departure_slope
        })

trends_df = pd.DataFrame(trends)

# Plot 1: Scatter with Trend Lines
fig1 = go.Figure()
for species in species_list[:3]:  # Limit to 3 for clarity
    species_data = df[df['comName'] == species]
    fig1.add_trace(go.Scatter(
        x=species_data['year'],
        y=species_data['arrival_doy'],
        mode='markers',
        name=f'{species} Arrival',
        marker=dict(size=10, opacity=0.7),
        hovertemplate='Year: %{x}<br>DOY: %{y}<br>Species: ' + species
    ))
    slope, intercept = calculate_trend(species_data, 'arrival_doy')
    trend_y = slope * species_data['year'] + intercept
    fig1.add_trace(go.Scatter(
        x=species_data['year'],
        y=trend_y,
        mode='lines',
        name=f'{species} Trend',
        line=dict(dash='dash')
    ))

fig1.update_layout(
    title='Arrival Timing Trends by Species',
    xaxis_title='Year',
    yaxis_title='Day of Year (Arrival)',
    template='plotly_dark',
    hovermode='closest',
    legend=dict(orientation='h', y=-0.2),
    height=500
)
fig1.show()

# Plot 2: Violin Plot
fig2 = px.violin(
    df,
    x='year',
    y='arrival_doy',
    color='comName',
    box=True,
    points='outliers',
    title='Arrival DOY Distribution Across Years',
    height=500
)
fig2.update_layout(
    template='plotly_dark',
    xaxis_title='Year',
    yaxis_title='Day of Year (Arrival)',
    legend=dict(orientation='h', y=-0.2)
)
fig2.show()

# Save trends
trends_df.to_csv('migration_trends.csv', index=False)

LinregressResult(slope=np.float64(-0.14298589176668763), intercept=np.float64(355.51991577174124), rvalue=np.float64(-0.014615955157523303), pvalue=np.float64(0.7619486042207663), stderr=np.float64(0.4717211681316731), intercept_stderr=np.float64(953.8040453818783))
LinregressResult(slope=np.float64(-0.20101073910296624), intercept=np.float64(577.054790482201), rvalue=np.float64(-0.006087248766961775), pvalue=np.float64(0.8996086939764827), stderr=np.float64(1.5924138119517288), intercept_stderr=np.float64(3219.806187153286))
LinregressResult(slope=np.float64(1.0778923036113583), intercept=np.float64(-2111.4165561408263), rvalue=np.float64(0.11629359852645983), pvalue=np.float64(0.14434668419234975), stderr=np.float64(0.7347049283822814), intercept_stderr=np.float64(1485.6338146683343))
LinregressResult(slope=np.float64(2.8862215316988267), intercept=np.float64(-5661.333150176235), rvalue=np.float64(0.08814180421403436), pvalue=np.float64(0.26924267747600233), stderr=np.float64(2.60318

In [6]:
# Correlation analysis
correlations = []
for species in species_list:
    species_data = df[df['comName'] == species]
    if len(species_data) > 10:
        for var in climate_vars:
            corr, p_value = pearsonr(species_data['arrival_doy'], species_data[var])
            correlations.append({
                'Species': species,
                'Variable': var,
                'Correlation': corr,
                'P_Value': p_value
            })

corr_df = pd.DataFrame(correlations)

# Plot 1: Correlation Bar Plot
fig1 = px.bar(
    corr_df,
    x='Species',
    y='Correlation',
    color='Variable',
    barmode='group',
    title='Correlation of Climate Variables with Arrival DOY',
    height=500
)
fig1.update_layout(
    template='plotly_dark',
    xaxis_title='Species',
    yaxis_title='Correlation Coefficient',
    xaxis_tickangle=45,
    legend=dict(orientation='h', y=-0.3)
)
fig1.show()

# Save correlations
corr_df.to_csv('climate_correlations.csv', index=False)

In [7]:
# Create GeoDataFrame
gdf = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df.lng, df.lat),
    crs="EPSG:4326"
)

# Plot 1: Folium Map with Clusters
def create_migration_map(species):
    species_gdf = gdf[gdf['comName'] == species]
    m = folium.Map(location=[39, -100], zoom_start=4, tiles='OpenStreetMap')
    marker_cluster = MarkerCluster().add_to(m)

    for idx, row in species_gdf.iterrows():
        folium.CircleMarker(
            location=[row['lat'], row['lng']],
            radius=6,
            popup=f"Species: {row['comName']}<br>Date: {row['obsDt']}<br>Habitat: {row['land_cover_type']}<br>Count: {row['howMany']}",
            fill=True,
            color='green' if row['habitat_loss_index'] < 0.5 else 'red',
            fill_opacity=0.7
        ).add_to(marker_cluster)

    return m

# Save and display map
m = create_migration_map(species_list[0])
m.save('migration_map.html')
from IPython.display import IFrame
IFrame(src='migration_map.html', width=700, height=500)

# Plot 2: Plotly Mapbox with Habitat Loss
fig2 = px.scatter_mapbox(
    df,
    lat='lat',
    lon='lng',
    color='habitat_loss_index',
     size=df['howMany'].fillna(0),
    hover_name='comName',
    hover_data=['obsDt', 'land_cover_type', 'habitat_loss_index'],
    zoom=3,
    mapbox_style='open-street-map',
    title='Migration Points with Habitat Loss Gradient',
    height=600
)
fig2.update_layout(
    template='plotly_dark',
    coloraxis_colorbar_title='Habitat Loss Index',
    legend=dict(orientation='h', y=-0.1)
)
fig2.show()

In [11]:
# Prepare data
features = ['temperature', 'precipitation', 'humidity', 'wind_speed']
X = df[features]
y = df['arrival_doy']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict
y_pred = rf.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Plot 1: Actual vs Predicted
pred_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
fig1 = px.scatter(
    pred_df,
    x='Actual',
    y='Predicted',
    title=f'Random Forest:Actual vs Predicted Arrival DOY (MSE: {mse:.2f})',
    trendline='ols',
    height=500
)
fig1.update_layout(
    template='plotly_dark',
    xaxis_title='Actual DOY',
    yaxis_title='Predicted DOY'
)
fig1.show()

# Plot 2: Feature Importance
importance = pd.DataFrame({
    'Feature': features,
    'Importance': rf.feature_importances_
})
fig2 = px.bar(
    importance,
    x='Feature',
    y='Importance',
    title='Feature Importance for Migration Timing',
    error_y=[0.01] * len(features),  # Placeholder for error bars
    height=500
)
fig2.update_layout(
    template='plotly_dark',
    xaxis_title='Feature',
    yaxis_title='Importance'
)
fig2.show()

# Save model
joblib.dump(rf, 'migration_model.pkl')

Mean Squared Error: 189.29219852941176


['migration_model.pkl']

In [12]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import plotly.express as px
import joblib

# Prepare data
features = ['temperature', 'precipitation', 'humidity', 'wind_speed']
X = df[features]
y = df['arrival_doy']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Gradient Boosting model
gb = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb.fit(X_train, y_train)

# Predict
y_pred = gb.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error (Gradient Boosting): {mse:.2f}')

# Plot 1: Actual vs Predicted
pred_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
fig1 = px.scatter(
    pred_df,
    x='Actual',
    y='Predicted',
    title=f'Actual vs Predicted Arrival DOY (MSE: {mse:.2f}) - Gradient Boosting',
    trendline='ols',
    height=500
)
fig1.update_layout(
    template='plotly_dark',
    xaxis_title='Actual DOY',
    yaxis_title='Predicted DOY'
)
fig1.show()

# Plot 2: Feature Importance
importance = pd.DataFrame({
    'Feature': features,
    'Importance': gb.feature_importances_
})
fig2 = px.bar(
    importance,
    x='Feature',
    y='Importance',
    title='Feature Importance for Migration Timing - Gradient Boosting',
    error_y=[0.01] * len(features),  # Optional placeholder
    height=500
)
fig2.update_layout(
    template='plotly_dark',
    xaxis_title='Feature',
    yaxis_title='Importance'
)
fig2.show()

# Save the model
joblib.dump(gb, 'gradient_boosting_model.pkl')


Mean Squared Error (Gradient Boosting): 195.04


['gradient_boosting_model.pkl']

In [13]:
# Initialize JupyterDash
app = JupyterDash(__name__)

# Dashboard layout
app.layout = html.Div([
    html.H1('Bird Migration Analysis Dashboard', style={'textAlign': 'center', 'color': '#FFFFFF'}),
    html.Div([
        html.Label('Select Species:', style={'color': '#FFFFFF'}),
        dcc.Dropdown(
            id='species-dropdown',
            options=[{'label': s, 'value': s} for s in species_list],
            value=[species_list[0]],
            multi=True,
            style={'backgroundColor': '#333333', 'color': '#000000'}
        ),
        html.Label('Select Year Range:', style={'color': '#FFFFFF', 'marginTop': '10px'}),
        dcc.RangeSlider(
            id='year-slider',
            min=df['year'].min(),
            max=df['year'].max(),
            step=1,
            value=[df['year'].min(), df['year'].max()],
            marks={str(year): str(year) for year in range(df['year'].min(), df['year'].max()+1, 2)}
        ),
        html.Label('Select Climate Variable:', style={'color': '#FFFFFF', 'marginTop': '10px'}),
        dcc.Dropdown(
            id='climate-dropdown',
            options=[{'label': v, 'value': v} for v in climate_vars],
            value=climate_vars[0],
            style={'backgroundColor': '#333333', 'color': '#000000'}
        ),
    ], style={'width': '50%', 'margin': 'auto'}),
    html.Div([
        dcc.Graph(id='trend-plot1', style={'width': '50%', 'display': 'inline-block'}),
        dcc.Graph(id='trend-plot2', style={'width': '50%', 'display': 'inline-block'})
    ]),
    html.Div([
        dcc.Graph(id='corr-plot1', style={'width': '50%', 'display': 'inline-block'}),
        dcc.Graph(id='corr-plot2', style={'width': '50%', 'display': 'inline-block'})
    ]),
    dcc.Graph(id='spatial-plot', style={'width': '100%'})
], style={'backgroundColor': '#1E1E1E', 'padding': '20px'})

# Callbacks
@app.callback(
    [Output('trend-plot1', 'figure'),
     Output('trend-plot2', 'figure'),
     Output('corr-plot1', 'figure'),
     Output('corr-plot2', 'figure'),
     Output('spatial-plot', 'figure')],
    [Input('species-dropdown', 'value'),
     Input('year-slider', 'value'),
     Input('climate-dropdown', 'value')]
)
def update_dashboard(species, year_range, climate_var):
    if isinstance(species, str):
        species = [species]
    filtered_df = df[df['comName'].isin(species) & (df['year'] >= year_range[0]) & (df['year'] <= year_range[1])]

    # Trend Plot 1: Scatter
    trend_fig1 = go.Figure()
    for s in species:
        species_data = filtered_df[filtered_df['comName'] == s]
        trend_fig1.add_trace(go.Scatter(
            x=species_data['year'],
            y=species_data['arrival_doy'],
            mode='markers+lines',
            name=f'{s} Arrival',
            marker=dict(size=8),
            hovertemplate='Year: %{x}<br>DOY: %{y}<br>Species: ' + s
        ))
    trend_fig1.update_layout(
        title='Arrival Timing Trends',
        xaxis_title='Year',
        yaxis_title='Day of Year',
        template='plotly_dark',
        height=400
    )

    # Trend Plot 2: Violin
    trend_fig2 = px.violin(
        filtered_df,
        x='comName',
        y='arrival_doy',
        color='year',
        box=True,
        title='Arrival DOY Distribution',
        height=400
    )
    trend_fig2.update_layout(
        template='plotly_dark',
        xaxis_title='Species',
        yaxis_title='Day of Year'
    )

    # Correlation Plot 1: Bar
    corr_data = []
    for s in species:
        species_data = filtered_df[filtered_df['comName'] == s]
        if len(species_data) > 5:
            corr, _ = pearsonr(species_data['arrival_doy'], species_data[climate_var])
            corr_data.append({'Species': s, 'Variable': climate_var, 'Correlation': corr})
    corr_df = pd.DataFrame(corr_data)
    corr_fig1 = px.bar(
        corr_df,
        x='Species',
        y='Correlation',
        title=f'Correlation with {climate_var}',
        height=400
    )
    corr_fig1.update_layout(template='plotly_dark')

    # Correlation Plot 2: Scatter
    corr_fig2 = px.scatter(
        filtered_df,
        x=climate_var,
        y='arrival_doy',
        color='comName',
        size='howMany',
        title=f'Arrival DOY vs {climate_var}',
        trendline='ols',
        height=400
    )
    corr_fig2.update_layout(template='plotly_dark')

    # Spatial Plot
    spatial_fig = px.scatter_mapbox(
        filtered_df,
        lat='lat',
        lon='lng',
        color='comName',
        size='howMany',
        hover_name='comName',
        hover_data=['obsDt', climate_var, 'habitat_loss_index'],
        zoom=3,
        mapbox_style='open-street-map',
        title='Migration Map',
        height=500
    )
    spatial_fig.update_layout(template='plotly_dark')

    return trend_fig1, trend_fig2, corr_fig1, corr_fig2, spatial_fig

# Run dashboard
app.run(mode='inline')


JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



<IPython.core.display.Javascript object>