In [4]:
import xarray as xr
import numpy as np
import pandas as pd
import os

def ensure_datetime_format(ds):
    if pd.api.types.is_datetime64_any_dtype(ds['time'].dtype):
        return ds
    else:
        ds['time'] = pd.to_datetime(ds['time'].values)
        return ds

In [5]:
# Load and prepare GHG historical data
ghg_historical = xr.open_dataset('../../data/raw/globalGhgEmissions/CO2_1deg_month_1850-2013.nc')
ghg_historical = ghg_historical.rename({'LonDim': 'longitude', 'LatDim': 'latitude', 'Times': 'time'})
ghg_historical = ensure_datetime_format(ghg_historical)

# Load temperature data for coordinate reference
temp_data = xr.open_dataset('../../data/raw/globalTemperature/Land_and_Ocean_LatLong1.nc')
ghg_historical = ghg_historical.assign_coords(longitude=temp_data.longitude, latitude=temp_data.latitude)

In [6]:
# Create a dataset for 2014 by extending the last available month of 2013
last_month_2013 = ghg_historical.isel(time=-1)
gap_year_data = xr.concat([last_month_2013] * 12, dim='time')
gap_year_data['time'] = pd.date_range('2014-01', periods=12, freq='MS')

In [7]:
# Define directories for SSP scenarios and output
ssp_base_dir = '../../data/raw/globalGhgEmissions'
ssp_filenames = [f for f in os.listdir(ssp_base_dir) if f.startswith('CO2_SSP')]
processed_dir = '../../data/processed/ssp_combined'
os.makedirs(processed_dir, exist_ok=True)

In [8]:
# Prepare and save each SSP scenario
for filename in ssp_filenames:
    ssp_path = os.path.join(ssp_base_dir, filename)
    ssp_data = xr.open_dataset(ssp_path)
    ssp_data = ssp_data.rename({'time': 'time', 'latitude': 'latitude', 'longitude': 'longitude'})
    ssp_data = ssp_data.assign_coords(longitude=temp_data.longitude, latitude=temp_data.latitude)
    
    # Ensure datetime format is correct for all datasets
    ssp_data = ensure_datetime_format(ssp_data)

    # Combine historical, gap year, and SSP data
    combined_ghg = xr.concat([ghg_historical, gap_year_data, ssp_data], dim='time')
    
    # Merge GHG data with temperature data
    combined_dataset = xr.merge([temp_data, combined_ghg])

    # Save the combined dataset with explicit encoding for time
    output_path = os.path.join(processed_dir, filename.replace('.nc', '_combined.nc'))
    encoding = {'time': {'units': 'days since 1850-01-01', 'calendar': 'standard'}}
    combined_dataset.to_netcdf(output_path, encoding=encoding)
    print(f"Saved combined data for {filename} to {output_path}")

TypeError: <class 'cftime._cftime.DatetimeNoLeap'> is not convertible to datetime, at position 0

In [11]:
import xarray as xr
import os
import numpy as np

# Load GHG historical data and rename coordinates to match the temperature data
base_dir = '../../data/raw/globalGhgEmissions'
ghg_historical_path = os.path.join(base_dir, 'CO2_1deg_month_1850-2013.nc')
ghg_historical = xr.open_dataset(ghg_historical_path)
ghg_historical = ghg_historical.rename({'LonDim': 'longitude', 'LatDim': 'latitude', 'Times': 'time'})

# Convert 'time' to a continuous integer index starting from 0
ghg_historical['time'] = np.arange(len(ghg_historical['time']))

# Load temperature data to use as a coordinate reference
temp_data_path = '../../data/raw/globalTemperature/Land_and_Ocean_LatLong1.nc'
temp_data = xr.open_dataset(temp_data_path)
ghg_historical = ghg_historical.assign_coords(longitude=temp_data.longitude, latitude=temp_data.latitude)

# Directory for SSP scenarios and output
ssp_base_dir = base_dir
ssp_filenames = [f for f in os.listdir(ssp_base_dir) if f.startswith('CO2_SSP')]
processed_dir = '../../data/processed/ssp_combined'
os.makedirs(processed_dir, exist_ok=True)

# Process each SSP dataset and assign unique time indices
for filename in ssp_filenames:
    ssp_path = os.path.join(ssp_base_dir, filename)
    ssp_data = xr.open_dataset(ssp_path)
    ssp_data = ssp_data.rename({'time': 'time', 'latitude': 'latitude', 'longitude': 'longitude'})

    # Assign a continuous index starting after the last index of historical data
    ssp_data['time'] = np.arange(len(ghg_historical['time']), len(ghg_historical['time']) + len(ssp_data['time']))

    # Combine historical and SSP data along the 'time' dimension without overlap
    combined_ghg = xr.concat([ghg_historical, ssp_data], dim='time')

    # Combine GHG data with temperature data
    combined_dataset = xr.merge([temp_data, combined_ghg])

    # Save the combined dataset
    output_path = os.path.join(processed_dir, filename.replace('.nc', '_combined.nc'))
    combined_dataset.to_netcdf(output_path)
    print(f"Saved combined data for {filename} to {output_path}")


Saved combined data for CO2_SSP585_2015_2150.nc to ../../data/processed/ssp_combined/CO2_SSP585_2015_2150_combined.nc
Saved combined data for CO2_SSP370_2015_2150.nc to ../../data/processed/ssp_combined/CO2_SSP370_2015_2150_combined.nc
Saved combined data for CO2_SSP119_2015_2150.nc to ../../data/processed/ssp_combined/CO2_SSP119_2015_2150_combined.nc
Saved combined data for CO2_SSP434_2015_2150.nc to ../../data/processed/ssp_combined/CO2_SSP434_2015_2150_combined.nc
Saved combined data for CO2_SSP245_2015_2150.nc to ../../data/processed/ssp_combined/CO2_SSP245_2015_2150_combined.nc
Saved combined data for CO2_SSP534_2015_2150.nc to ../../data/processed/ssp_combined/CO2_SSP534_2015_2150_combined.nc
Saved combined data for CO2_SSP460_2015_2150.nc to ../../data/processed/ssp_combined/CO2_SSP460_2015_2150_combined.nc
Saved combined data for CO2_SSP126_2015_2150.nc to ../../data/processed/ssp_combined/CO2_SSP126_2015_2150_combined.nc
