In [2]:
# Import module
from climate_processor import (
    process_ch4_concentration, 
    process_permafrost, 
    process_wetlands, 
    process_industrial_emissions
)

In [5]:
import pandas as pd 

In [3]:
# Test CH4 concentration first (your target variable)
print("CH4_Concentration Processing")
print("="*60)
ch4_df = process_ch4_concentration('Pre_process/data_sfc_Ch4_annual.nc')


CH4_Concentration Processing
=== PROCESSING CH4 CONCENTRATION DATA (CONVERTED TO PPM) ===
File: Pre_process/data_sfc_Ch4_annual.nc
Variable: ch4_concentration → ch4_concentration

1. Loading data...
   Dimensions: {'time': 15, 'latitude': 55, 'longitude': 119}
   Time coordinate: time
   Time range: 2010-12-31T00:00:00.000000000 to 2024-12-31T00:00:00.000000000
   Time steps: 15
   Variable shape: (15, 55, 119)
   Variable dimensions: ('time', 'latitude', 'longitude')

2. Processing coordinates...
   Latitude coordinate: latitude
   Longitude coordinate: longitude
DEBUG lat/lon names: latitude longitude
DEBUG data_var.coords: ['latitude', 'longitude', 'time']

3. Resampling to 0.1° resolution...
   Target grid: 406 lats × 886 lons


  print(f"   Dimensions: {dict(ds.dims)}")


   Resampled shape: (15, 406, 886)
   Resampled value range: 0.007131550926715136 to 0.010327703319489956

4. Extracting to CSV...
   Years: 2010 to 2024
   Valid pixels: 358,830

✅ CH4 CONCENTRATION DATA (CONVERTED TO PPM) COMPLETE!
   File: Ch4_Concentration_2010-2024.csv
   Records: 5,382,450
   Unique pixels: 358,830
   Years: 2010-2024
   Value range: 0.007 to 0.010

   Validation:
   - Missing values: 0
   - Records per year: 358,830
   - Consistent pixel count: True


In [6]:
df = pd.read_csv("Ch4_Concentration_2010-2024.csv")
print(df.columns)
print(f"Latitude range: {df['latitude'].min()} to {df['latitude'].max()}")
print(f"Longitude range: {df['longitude'].min()} to {df['longitude'].max()}")

Index(['pixel_id', 'longitude', 'latitude', 'year', 'ch4_concentration'], dtype='object')
Latitude range: -141.0 to -52.5
Longitude range: 42.0 to 82.4


In [7]:
# Swap if needed
df = df.rename(columns={'latitude': 'longitude', 'longitude': 'latitude'})
print(f"Latitude range: {df['latitude'].min()} to {df['latitude'].max()}")
print(f"Longitude range: {df['longitude'].min()} to {df['longitude'].max()}")

Latitude range: 42.0 to 82.4
Longitude range: -141.0 to -52.5


In [12]:
# Process permafrost zones
# print("\nPermafrost")
# print("="*60)
permafrost_zones_df = process_permafrost('Pre_process/PERMAFROST_Canada_Annual_2010_2024.nc')

=== PROCESSING PERMAFROST FRACTION ===
File: Pre_process/PERMAFROST_Canada_Annual_2010_2024.nc
Variable: permafrost_fraction → permafrost_fraction

1. Loading data...
   Dimensions: {'time': 15, 'lat': 4500, 'lon': 8900}
   Time coordinate: time
   Time range: 2010 to 2024
   Time steps: 15


  print(f"   Dimensions: {dict(ds.dims)}")


   Variable shape: (15, 4500, 8900)
   Variable dimensions: ('time', 'lat', 'lon')

2. Processing coordinates...
   Latitude coordinate: lat
   Longitude coordinate: lon
DEBUG lat/lon names: lat lon
DEBUG data_var.coords: ['time', 'lat', 'lon']

3. Resampling to 0.1° resolution...
   Target grid: 406 lats × 886 lons
   Resampled shape: (15, 406, 886)
   Resampled value range: 0.0 to 100.0

4. Extracting to CSV...
   Years: 2010 to 2024
   Valid pixels: 128,116

✅ PERMAFROST FRACTION COMPLETE!
   File: Permafrost_Fraction_2010-2024.csv
   Records: 1,921,740
   Unique pixels: 128,116
   Years: 2010-2024
   Value range: 0.000 to 1.000

   Validation:
   - Missing values: 0
   - Records per year: 128,116
   - Consistent pixel count: True


In [13]:
df = pd.read_csv("Permafrost_Fraction_2010-2024.csv")
print(df.columns)
print(f"Latitude range: {df['latitude'].min()} to {df['latitude'].max()}")
print(f"Longitude range: {df['longitude'].min()} to {df['longitude'].max()}")

Index(['pixel_id', 'longitude', 'latitude', 'year', 'permafrost_fraction'], dtype='object')
Latitude range: -140.9 to -52.5
Longitude range: 42.6 to 82.5


In [14]:
# Swap if needed
df = df.rename(columns={'latitude': 'longitude', 'longitude': 'latitude'})
print(f"Latitude range: {df['latitude'].min()} to {df['latitude'].max()}")
print(f"Longitude range: {df['longitude'].min()} to {df['longitude'].max()}")

Latitude range: 42.6 to 82.5
Longitude range: -140.9 to -52.5


In [8]:
# Process wetlands 
# print("\nWetland Processing")
# print("="*60)
wetlands_df = process_wetlands('Pre_process/WETLAND_Canada_Annual_2010_2024.nc')

=== PROCESSING WETLAND FRACTION DATA ===
File: Pre_process/WETLAND_Canada_Annual_2010_2024.nc
Variable: wetland_fraction → wetland_fraction

1. Loading data...
   Dimensions: {'year': 14, 'latitude': 180, 'longitude': 356}
   Time coordinate: year
   Time range: 2010 to 2024
   Time steps: 14
   Variable shape: (14, 180, 356)
   Variable dimensions: ('year', 'latitude', 'longitude')

2. Processing coordinates...
   Latitude coordinate: latitude
   Longitude coordinate: longitude
DEBUG lat/lon names: latitude longitude
DEBUG data_var.coords: ['latitude', 'longitude', 'year']

3. Resampling to 0.1° resolution...
   Target grid: 406 lats × 886 lons
   Resampled shape: (14, 406, 886)
   Resampled value range: 0.0 to 0.23190315067768097

4. Extracting to CSV...
   Years: 2010 to 2024
   Valid pixels: 6,415


  print(f"   Dimensions: {dict(ds.dims)}")



✅ WETLAND FRACTION DATA COMPLETE!
   File: Wetland_Fraction_2010-2024.csv
   Records: 89,810
   Unique pixels: 6,415
   Years: 2010-2024
   Value range: 0.000 to 18.287

   Validation:
   - Missing values: 21051
   - Records per year: 6,415
   - Consistent pixel count: True


In [9]:
import pandas as pd
df = pd.read_csv("Wetland_Fraction_2010-2024.csv")
print(df.columns)
print(f"Latitude range: {df['latitude'].min()} to {df['latitude'].max()}")
print(f"Longitude range: {df['longitude'].min()} to {df['longitude'].max()}")

Index(['pixel_id', 'longitude', 'latitude', 'year', 'wetland_fraction'], dtype='object')
Latitude range: -136.7 to -52.8
Longitude range: 42.0 to 66.7


In [10]:
df = df.rename(columns={'latitude': 'longitude', 'longitude': 'latitude'})

In [11]:
print(f"Latitude range: {df['latitude'].min()} to {df['latitude'].max()}")
print(f"Longitude range: {df['longitude'].min()} to {df['longitude'].max()}")

Latitude range: 42.0 to 66.7
Longitude range: -136.7 to -52.8


In [15]:
# Process Industrial Emissions
# print("\nIndustrial Emissions")
# print("="*60)
emissions_df = process_industrial_emissions('Pre_process/CH4_FE_Canada_Annual_2010_2024.nc')
# print(f"Industrial Emissions pixel count: {emissions_df['pixel_id'].nunique():,}")
# print(f"Expected: 366,860")
# print(f"Match: {emissions_df['pixel_id'].nunique() == 366860}")


=== PROCESSING INDUSTRIAL CH4 EMISSIONS DATA ===
File: Pre_process/CH4_FE_Canada_Annual_2010_2024.nc
Variable: fuel_emi → ch4_emissions

1. Loading data...
   Dimensions: {'time': 15, 'lat': 450, 'lon': 890}
   Time coordinate: time
   Time range: 2010-01-01T00:00:00.000000000 to 2024-01-01T00:00:00.000000000
   Time steps: 15
   Variable shape: (15, 450, 890)
   Variable dimensions: ('time', 'lat', 'lon')

2. Processing coordinates...
   Latitude coordinate: lat
   Longitude coordinate: lon
DEBUG lat/lon names: lat lon
DEBUG data_var.coords: ['lat', 'lon', 'time']

3. Resampling to 0.1° resolution...
   Target grid: 406 lats × 886 lons
   Resampled shape: (15, 406, 886)
   Resampled value range: 0.0 to 971784.8125

4. Extracting to CSV...
   Years: 2010 to 2024
   Valid pixels: 359,310


  print(f"   Dimensions: {dict(ds.dims)}")



✅ INDUSTRIAL CH4 EMISSIONS DATA COMPLETE!
   File: Ch4_Emissions_2010-2024.csv
   Records: 5,389,650
   Unique pixels: 359,310
   Years: 2010-2024
   Value range: 0.000 to 971784.812

   Validation:
   - Missing values: 59
   - Records per year: 359,310
   - Consistent pixel count: True


In [16]:
df = pd.read_csv("Ch4_Emissions_2010-2024.csv")
print(df.columns)
print(f"Latitude range: {df['latitude'].min()} to {df['latitude'].max()}")
print(f"Longitude range: {df['longitude'].min()} to {df['longitude'].max()}")

Index(['pixel_id', 'longitude', 'latitude', 'year', 'ch4_emissions'], dtype='object')
Latitude range: -140.9 to -52.5
Longitude range: 42.0 to 82.5


In [17]:
# Swap if needed
df = df.rename(columns={'latitude': 'longitude', 'longitude': 'latitude'})
print(f"Latitude range: {df['latitude'].min()} to {df['latitude'].max()}")
print(f"Longitude range: {df['longitude'].min()} to {df['longitude'].max()}")

Latitude range: 42.0 to 82.5
Longitude range: -140.9 to -52.5


In [None]:
# import gc
# import psutil

# # Garbage collection to free up memory
# gc.collect()

# # Check available memory (optional)
# mem = psutil.virtual_memory()
# print(f"Available RAM: {mem.available / 1e9:.2f} GB")
