# Final Arctic Dataset Merge


In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

We combine all processed Arctic climate variables — sea ice area, extent, thickness, and atmospheric/oceanic predictors — into one consolidated dataset for analysis and modeling.  

This merged file will serve as the **master dataset** for correlation studies, forecasting models, and final visualizations.

In [None]:
# File Path
BASE_DIR = Path().resolve().parents[1]

arctic_predictors = BASE_DIR / "data" / "pre_processed" / "era5_arctic_merged_clean.csv"
arctic_thickness = BASE_DIR / "data" / "pre_processed" / "new_thickness_annual.csv"
arctic_area = BASE_DIR / "data" / "pre_processed" / "arctic_sia_sie_monthly.csv"

output_path = BASE_DIR / "data" / "final" / "final_arctic.csv"

### Load Processed Components
Import the pre-processed CSV files (sea ice, thickness, predictors, etc.) and check consistency of time coverage (1978–2023) before merging.

In [3]:
# Read datasets
predictors_df = pd.read_csv(arctic_predictors)
thickness_df = pd.read_csv(arctic_thickness)
area_df = pd.read_csv(arctic_area)

# Ensure year and month are integers
for df in [predictors_df, thickness_df, area_df]:
    df["year"] = df["year"].astype(int)
    df["month"] = df["month"].astype(int)

### Merge and Export Final Dataset
Join all Arctic variables by **year** and **month** into a single DataFrame.  
Save as `final_arctic.csv` for downstream use in correlation heatmaps, feature analysis, and ML-based forecasting.

In [None]:
# Step 1: Filter thickness data to 1978–2023
thickness_df = thickness_df[(thickness_df["year"] >= 1978) & (thickness_df["year"] <= 2023)]

# Step 2: Left join thickness with area on year & month
area_thickness_df = pd.merge(
    area_df,
    thickness_df,
    on=["year", "month"],
    how="left"
)

# Step 3: Inner join with predictors on year & month
final_df = pd.merge(
    area_thickness_df,
    predictors_df,
    on=["year", "month"],
    how="inner"
)

In [5]:
# Step 4: Save final merged dataset
final_df.to_csv(output_path, index=False)

print(f"Final merged dataset saved to: {output_path}")
print(final_df.head())

Final merged dataset saved to: D:\Msc Data and Computational Science\Summer\Projects in Maths Modelling\Github\project-acm40960-ss\data\pre_processed\final_arctic.csv
   year  month  sia_million_km2  sie_million_km2    thick_m          cdir  \
0  1978     10         9.524144        10.153839        NaN  793575.44000   
1  1978     11        10.811478        11.506771        NaN   42043.16400   
2  1978     12        12.834232        13.668629        NaN     137.44655   
3  1979      1        14.615531        15.543609  3973.8833   10864.35900   
4  1979      2        15.492461        16.448393  3973.9478  352633.30000   

             uvb       slhf        sf        t2m        sst      istl1  \
0  110108.530000 -923622.06  0.000985  260.21402  272.12772  263.60452   
1    8703.622000 -821054.06  0.000860  253.88397  271.93120  259.63235   
2      67.879295 -621884.70  0.000541  249.05974  271.80540  255.64603   
3    2573.583700 -675217.70  0.000611  248.04901  271.73035  255.39026   
