In [None]:
#Libraries and modules
import os
import pandas as pd
import numpy as np
import netCDF4 as nc
import xarray as xr
import geopandas as gpd
import folium
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from dotenv import load_dotenv
import earthaccess
import requests
from datetime import datetime
from noaa_sdk import NOAA

from dotenv import load_dotenv
load_dotenv()

In [None]:
#KEY LOADING

#NASA EARTHACCESS
bearer_token = os.getenv("NASA_BEARER_TOKEN")
#gbif_api_key = os.getenv("GBIF_API_KEY")

if not bearer_token :
    raise ValueError("------- TOKEN NOT FOUND. - ERROR: NASA EARTHACCESS BEARER TOKEN.")
#if not gbif_api_key :
#    raise ValueError("------- TOKEN NOT FOUND. - ERROR: GBIF API KEY.")

'''
Turns out GBIF does not require an API key. Which is kind of dope. Less work for me
'''

auth = earthaccess.login(persist=True)

#Session Check
if not auth:
    raise RuntimeError("!------- AUTHENTICATION FAILED.")

datasets = earthaccess.search_data(short_name="MOD11A1", version="6.1")
print("------- AVAILABLE DATASETS: ", datasets)

#DATA PATHS

CLIMATE_DATA_PATH = os.path.abspath("climate_data/")
BIRD_DATA_PATH = os.path.abspath("bird_data/")
FISH_DATA_PATH = os.path.abspath("fish_data/")

# Ensure directories exist
for path in [CLIMATE_DATA_PATH, BIRD_DATA_PATH, FISH_DATA_PATH]:
    os.makedirs(path, exist_ok=True)


In [None]:
# CLIMATE DATA - FETCH AND LOAD

def fetch_climate_data(short_name="MODIS", version="6.1"):
    """Searches and downloads NASA Earthdata climate datasets."""
    results = earthaccess.search_data(short_name=short_name, version=version)
    files = earthaccess.download(results, local_path=CLIMATE_DATA_PATH)
    print("------- Climate Data Downloaded:", files)
    return files

climate_files = fetch_climate_data()


def load_climate_data(file_path):
    """Loads climate data from a NetCDF file and extracts relevant variables."""
    dataset = nc.Dataset(file_path, "r")
    print("------- Climate Data Variables:", dataset.variables.keys())
    
    time = dataset.variables["time"][:]
    temp = dataset.variables["LST_Day_1km"][:]
    lat = dataset.variables["latitude"][:]
    lon = dataset.variables["longitude"][:]
    
    time_units = dataset.variables["time"].units
    time = nc.num2date(time, time_units)
    
    dataset.close()
    
    return pd.DataFrame({"Time": time, "Latitude": lat.mean(), "Longitude": lon.mean(), "Temperature": temp.mean()})

# Load first climate dataset
if climate_files:
    climate_df = load_climate_data(os.path.join(CLIMATE_DATA_PATH, climate_files[0]))
    print("------- Climate Data Sample:\n", climate_df.head())
else:
    print("------- ERROR! NO CLIMATE DATA FILES FOUND. - CHECK EARTHDATA FETCH AND LOAD.")

In [None]:
#BIRD DATA FETCH & LOAD 

GBIF_API_URL = "https://api.gbif.org/v1/species/match"
OCCURRENCE_API_URL = "https://api.gbif.org/v1/occurrence/search"

def get_gbif_taxon_key(species_name):
    """Fetches taxon serial ID for species by the species name."""
    response = requests.get(f"{GBIF_API_URL}?name={species_name}")
    if response.status_code = 200:
        data = response.json()
        return data.get("usageKey", None)
    return None

def fetch_gbif_data(species_name, limit=5000):
    """Fetches MIGRATION DATA based on the taxon key gained from previous func"""
    taxon_key = get_gbif_taxon_key(species_name)
    if not taxon_key:
        print(f"------- NO TAXON KEY FOUND FOR {species_name}")
        return None
    
    params = {
        "taxonKey" : taxon_key,
        "limit" : limit,
        "hasCoordinate" : True,
        "year" : "2000, 2024"
    }
    
    response = requests.get(OCCURRENCE_API_URL, params=params)
    if response.status_code == 200:
        data = response.json()
        if "results" not in data or len(data["results"]) == 0:
            print(f"------- No results found for {species_name}.")
            return None
        
        return pd.DataFrame([
            {
                "species": entry.get("species", ""),
                "latitude": entry.get("decimalLatitude", ""),
                "longitude": entry.get("decimalLongitude", ""),
                "timestamp": entry.get("eventDate", "")
            }
            for entry in data["results"]
        ])
    else:
        print(f"------- Error fetching GBIF data for {species_name}: {response.status_code}")
        return None

os.makedirs(BIRD_DATA_PATH, exist_ok=True)

# List of species to dynamically fetch
species_list = ["Barn Swallow", "Arctic Tern", "Whooping Crane"]

"""  
1. Barn Swallow - Chosen for abundance of data
2. Arctic Tern - Chosen for distance of migration
3. Whooping Crane - Chosen for endangered status
"""

for species in species_list:
    df = fetch_gbif_data(species)
    if df is not None:
        file_path = f"{BIRD_DATA_PATH}/{species.replace(' ', '_')}.csv"
        df.to_csv(file_path, index=False)
        print(f"------- Saved {species} data to {file_path}")

def load_migration_data(file_path):
    """Loads migration data from a CSV file."""
    df = pd.read_csv(file_path)
    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
    return df

# Load bird data
bird_files = os.listdir(BIRD_DATA_PATH)
bird_df = load_migration_data(os.path.join(BIRD_DATA_PATH, bird_files[0])) if bird_files else None

In [None]:
#FISH DATA FETCH & LOAD

GFW_API_URL = "https://globalfishingwatch.org/api/vessel-activity"

def fetch_fish_migration_data(limit=5000):
    """Fetch fish migration data from the Global Fishing Watch API."""
    params = {
        "limit": limit,
        "hasCoordinate": True,
        "year": "2000,2024"
    }
    response = requests.get(GFW_API_URL, params=params)
    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame(data)
    else:
        print(f"------- Error fetching fish migration data: {response.status_code}")
        return None

fish_df = fetch_fish_migration_data()
if fish_df is not None:
    fish_df.to_csv(f"{FISH_DATA_PATH}/fish_migration.csv", index=False)
    print(f"------- Saved fish migration data to {FISH_DATA_PATH}/fish_migration.csv")

In [None]:
#TRAIN TEST SPLITTING

def split_data(df, test_size=0.2):
    """Splits dataset into training and testing sets."""
    if df is not None and not df.empty:
        train_df, test_df = train_test_split(df, test_size=test_size, random_state=42)
        return train_df, test_df
    else:
        print("⚠️ Data is empty or None. Skipping train-test split.")
        return None, None

# Split climate, bird, and fish data
train_climate_df, test_climate_df = split_data(climate_df) if climate_df is not None else (None, None)
train_bird_df, test_bird_df = split_data(bird_df) if bird_df is not None else (None, None)
train_fish_df, test_fish_df = split_data(fish_df) if fish_df is not None else (None, None)

In [None]:
#TRAINING MODELS

def evaluate_model(model, X_test, y_test, model_name):
    """Evaluates the model with multiple metrics."""
    if model is None:
        print(f"!------- {model_name} was not trained.")
        return None
    predictions = model.predict(X_test)
    r2 = r2_score(y_test, predictions)
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    print(f"------- {model_name} Performance:")
    print(f"   R² Score: {r2:.4f}")
    print(f"   Mean Squared Error (MSE): {mse:.4f}")
    print(f"   Mean Absolute Error (MAE): {mae:.4f}")
    return predictions

# Train Linear Regression Model
def train_linear_regression(train_df, test_df):
    if train_df is None or test_df is None:
        return None
    X_train, X_test = train_df[["Temperature"]], test_df[["Temperature"]]
    y_train, y_test = train_df["latitude"], test_df["latitude"]
    model = LinearRegression()
    model.fit(X_train, y_train)
    predictions = evaluate_model(model, X_test, y_test, "Linear Regression")
    return model, predictions

# Train Random Forest Model
def train_random_forest(train_df, test_df):
    if train_df is None or test_df is None:
        return None
    X_train, X_test = train_df[["Temperature"]], test_df[["Temperature"]]
    y_train, y_test = train_df["latitude"], test_df["latitude"]
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    predictions = evaluate_model(model, X_test, y_test, "Random Forest")
    return model, predictions

# Apply K-Means Clustering
def apply_kmeans(df, num_clusters=3):
    if df is None or df.empty:
        return None
    X = df[["latitude", "longitude"]]
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    df["cluster"] = kmeans.fit_predict(X)
    print("------- K-Means Clustering applied.")
    return df

# Train ARIMA Model
def train_arima(df):
    if df is None or df.empty:
        return None
    df = df.set_index("Time")["Temperature"]
    model = ARIMA(df, order=(5,1,0))
    model_fit = model.fit()
    print("------- ARIMA Model trained for climate forecasting.")
    return model_fit

# Train Prophet Model
def train_prophet(df):
    if df is None or df.empty:
        return None
    df = df.rename(columns={"Time": "ds", "Temperature": "y"})
    model = Prophet()
    model.fit(df)
    print("------- Prophet Model trained for long-term climate forecasting.")
    return model

# Train models and get predictions
linear_reg_model, linear_predictions = train_linear_regression(train_bird_df, test_bird_df)
random_forest_model, rf_predictions = train_random_forest(train_bird_df, test_bird_df)
bird_clusters = apply_kmeans(train_bird_df)
arima_model = train_arima(train_climate_df)
prophet_model = train_prophet(train_climate_df)

In [None]:
#PLOTTING AND VISUALIZATION

def plot_predictions(actual, predictions, model_name):
    """Generates a scatter plot comparing actual vs. predicted values."""
    if predictions is None:
        return
    plt.figure(figsize=(8, 5))
    plt.scatter(actual, predictions, alpha=0.5, label='Predictions')
    plt.plot(actual, actual, color='red', label='Ideal Fit')
    plt.xlabel("Actual Values")
    plt.ylabel("Predicted Values")
    plt.title(f"{model_name}: Actual vs. Predicted")
    plt.legend()
    plt.show()

# Plot migration shifts on a map
def plot_migration_map(df, title="Migration Map"):
    """Visualizes migration patterns on a Folium map."""
    if df is None or df.empty:
        print("!------- No data available for migration mapping.")
        return
    migration_map = folium.Map(location=[df["latitude"].mean(), df["longitude"].mean()], zoom_start=4)
    
    for _, row in df.iterrows():
        folium.Marker(
            location=[row["latitude"], row["longitude"]],
            popup=f"Species: {row.get('species', 'Unknown')}\nDate: {row.get('timestamp', 'N/A')}",
            icon=folium.Icon(color="blue")
        ).add_to(migration_map)
    return migration_map

# Plot temperature trends over time
def plot_temperature_trends(df):
    """Plots temperature trends over time from climate data."""
    if df is None or df.empty:
        print("!------- No climate data available for plotting temperature trends.")
        return
    plt.figure(figsize=(10, 5))
    sns.lineplot(x=df["Time"], y=df["Temperature"])
    plt.xlabel("Year")
    plt.ylabel("Temperature (°C)")
    plt.title("Climate Temperature Trends Over Time")
    plt.show()

# Generate bird-related plots
plot_predictions(test_bird_df["latitude"], linear_predictions, "Linear Regression (Birds)")
plot_predictions(test_bird_df["latitude"], rf_predictions, "Random Forest (Birds)")
plot_temperature_trends(train_climate_df)
bird_migration_map = plot_migration_map(test_bird_df, "Bird Migration Map")
bird_migration_map.save("bird_migration_map.html")  # Save map as HTML

# Generate fish-related plots
plot_predictions(test_fish_df["latitude"], linear_predictions, "Linear Regression (Fish)")
plot_predictions(test_fish_df["latitude"], rf_predictions, "Random Forest (Fish)")
fish_migration_map = plot_migration_map(test_fish_df, "Fish Migration Map")
fish_migration_map.save("fish_migration_map.html")  # Save map as HTML
