In [1]:
# Project 1: Time Series Analysis of CPUE Patterns

# Import libraries
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [2]:
# Load dataset
data_path = "../data/squid_cpue.csv"  
df = pd.read_csv(data_path)


In [3]:
# Remove extra spaces in column names (common CSV issue)
df.columns = df.columns.str.strip()

In [4]:
# Convert to datetime and rename Year column
df.rename(columns={"Year1": "Year"}, inplace=True)
df["Date"] = pd.to_datetime(df[["Year", "Month", "Day"]])

In [5]:
# Assume 1 day = 1 unit of effort
df["CPUE"] = df["SqCatch_Kg"]

In [7]:
# Group by Year and Month
monthly_cpue = (
    df.groupby(["Year", "Month"])
    .agg(
        TotalCatch=("CPUE", "sum"),# sums the squid catch kg per month in each year as shown in groupby function
        DaysFished=("Day", "count")  # counts number of fishing days per month in each year as shown in groupby function
    )
    .reset_index()
)

# Calculate monthly CPUE = total catch / days fished
monthly_cpue["Monthly_CPUE"] = monthly_cpue["TotalCatch"] / monthly_cpue["DaysFished"]
monthly_cpue.sort_values(["Year", "Month"], inplace=True)

# Show the monthly CPUE table
monthly_cpue.head(12)

Unnamed: 0,Year,Month,TotalCatch,DaysFished,Monthly_CPUE
0,2000,1,26483770.0,2208,11994.459998
1,2000,2,65327980.0,2803,23306.450875
2,2000,3,74240520.0,3024,24550.437656
3,2000,4,37056590.0,2932,12638.674576
4,2000,5,25931140.0,2862,9060.495904
5,2000,6,6796407.0,1117,6084.518122
6,2000,7,1548881.0,157,9865.482517
7,2000,8,219057.1,24,9127.379498
8,2000,11,192957.1,24,8039.877587
9,2000,12,10137660.0,768,13200.08218


In [8]:

#Save summary statistics
summary = monthly_cpue.groupby("Year")["Monthly_CPUE"].describe()
summary.to_csv("../results/summaries/yearly_cpue_summary.csv")

# Create an interactive line plot
fig = px.line(
    monthly_cpue,
    x="Month",
    y="Monthly_CPUE",
    color="Year",
    markers=True,
    title="Monthly Standardized CPUE (kg/day) by Year",
    labels={"Monthly_CPUE": "CPUE (kg/day)", "Month": "Month"}
)

# Customize layout
fig.update_layout(
    legend_title="Year",
    xaxis=dict(tickmode='linear', tick0=1, dtick=1),
    hovermode='x unified',
    width=900,
    height=600
)

# Save as interactive HTML
fig.write_html("../results/plots/monthly_cpue_plot.html")

# Save as static PNG (requires 'kaleido')
try:
    fig.write_image("../results/plots/monthly_cpue_plot.png")
except ValueError as e:
    print("Static image could not be saved. Ensure 'kaleido' is installed. Error:", e)

fig.show()

Static image could not be saved. Ensure 'kaleido' is installed. Error: 
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido



In [2]:
import plotly.express as px

fig = px.line(x=[1, 2, 3], y=[4, 5, 6], title="Test Plot")
fig.write_image("test_plot.png")
print("Plot successfully saved!")

ValueError: 
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido
