In [None]:
import numpy as np
import pandas as pd
from pathlib import Path

In [2]:
# File Path
BASE_DIR = Path().resolve().parents[1]

arctic_predictors = BASE_DIR / "data" / "processed" / "era5_arctic_merged_clean.csv"
arctic_thickness = BASE_DIR / "data" / "processed" / "arctic_ice_thickness_annual.csv"
arctic_area = BASE_DIR / "data" / "processed" / "arctic_sia_sie_monthly.csv"

output_path = BASE_DIR / "data" / "processed" / "final_arctic.csv"

In [4]:
# Read datasets
predictors_df = pd.read_csv(arctic_predictors)
thickness_df = pd.read_csv(arctic_thickness)
area_df = pd.read_csv(arctic_area)

# Ensure year and month are integers
for df in [predictors_df, thickness_df, area_df]:
    df["year"] = df["year"].astype(int)
    df["month"] = df["month"].astype(int)

In [5]:
# Step 1: Filter thickness data to 1978–2023
thickness_df = thickness_df[(thickness_df["year"] >= 1978) & (thickness_df["year"] <= 2023)]

# Step 2: Left join thickness with area on year & month
area_thickness_df = pd.merge(
    area_df,
    thickness_df,
    on=["year", "month"],
    how="left"
)

# Step 3: Inner join with predictors on year & month
final_df = pd.merge(
    area_thickness_df,
    predictors_df,
    on=["year", "month"],
    how="inner"
)


In [6]:
# Step 4: Save final merged dataset
final_df.to_csv(output_path, index=False)

print(f"Final merged dataset saved to: {output_path}")
print(final_df.head())

Final merged dataset saved to: D:\Desktopped\UCD\Summer\Summer project\Project_ACM\data\processed\final_arctic.csv
   year  month  sia_million_km2  sie_million_km2  thick_m       cdir  \
0  1978     10         9.524144        10.153839     0.35  793575.44   
1  1978     10         9.524144        10.153839     0.52  793575.44   
2  1978     10         9.524144        10.153839     0.44  793575.44   
3  1978     10         9.524144        10.153839     0.40  793575.44   
4  1978     10         9.524144        10.153839     0.45  793575.44   

         uvb       slhf        sf        t2m        sst      istl1      istl2  \
0  110108.53 -923622.06  0.000985  260.21402  272.12772  263.60452  264.34616   
1  110108.53 -923622.06  0.000985  260.21402  272.12772  263.60452  264.34616   
2  110108.53 -923622.06  0.000985  260.21402  272.12772  263.60452  264.34616   
3  110108.53 -923622.06  0.000985  260.21402  272.12772  263.60452  264.34616   
4  110108.53 -923622.06  0.000985  260.21402  2