In [None]:
# Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from pygam import LinearGAM, s
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [None]:
# Load dataset
df = pd.read_csv("../data/squid_cpue_enriched.csv")

In [None]:
# Drop rows with missing values (if any)
df = df.dropna(subset=["SqCatch_Kg", "Depth", "Lat", "Lon", "WaterTemp"])

In [None]:
# Calculate CPUE
df["CPUE"] = df["SqCatch_Kg"] / df["Depth"]
y = np.log(df["CPUE"] + 1)

In [None]:
# Features we'll use
features = ["Year1", "Month", "Lat", "Lon", "WaterTemp"]
X = df[features]

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# GLBM (via OLS)
X_glm = sm.add_constant(X_train)
model_glm = sm.OLS(y_train, X_glm).fit()
print(model_glm.summary())

In [None]:
# GAM Model
gam = LinearGAM(s(0) + s(1) + s(2) + s(3) + s(4)).fit(X_train, y_train)
print("GAM R²:", gam.statistics_["pseudo_r2"])

In [None]:
# Plot effect of Latitude
XX = gam.generate_X_grid(term=2)
plt.plot(XX[:, 2], gam.partial_dependence(term=2, X=XX))
plt.title("Effect of Latitude on log(CPUE + 1)")
plt.xlabel("Latitude")
plt.ylabel("Effect")
plt.tight_layout()
plt.show()

In [None]:
# Predict vs Actual
preds = gam.predict(X_test)
plt.scatter(y_test, preds, alpha=0.5)
plt.xlabel("Actual log(CPUE + 1)")
plt.ylabel("Predicted")
plt.title("GAM Model Performance")
plt.tight_layout()
plt.show()

In [None]:
ds = xr.open_dataset(file_path, engine="netcdf4")