# Overwrite bad coordinates in CMIP6 data
Some CMIP6 datasets have mismatched coordinates and datatypes in different simulations. This script fixes these problems.

In [None]:
from sup3r.preprocessing.data_handling import DataHandlerNCforCC
from sup3r.preprocessing.data_handling import DataHandlerNCforCCwithPowerLaw
from sup3r.bias.bias_calc import SkillAssessment
import numpy as np
import matplotlib.pyplot as plt
import os 
import glob

import xarray as xr

import warnings
warnings.filterwarnings('ignore')

## Primary Function

In [None]:
def overwrite_lat_lon_xarray(overwrite_file, source_file, output_file):
    # Open the overwrite file using xarray
    overwrite_ds = xr.open_dataset(overwrite_file)

    # Open the source file using xarray
    source_ds = xr.open_dataset(source_file)

    try:
        # Extract latitude and longitude values from the source file
        source_lat_bnds = source_ds['lat_bnds'].values
        source_lon_bnds = source_ds['lon_bnds'].values
        
        source_lat = source_ds['lat'].values
        source_lon = source_ds['lon'].values

        # Overwrite latitude and longitude values in the overwrite file (including dtypes!!)
        overwrite_ds['lat_bnds'] = source_ds['lat_bnds']
        overwrite_ds['lon_bnds'] = source_ds['lon_bnds']
        overwrite_ds['lat_bnds'].values = source_ds['lat_bnds'].values
        overwrite_ds['lon_bnds'].values = source_ds['lon_bnds'].values
        overwrite_ds = overwrite_ds.assign_coords(lat=source_lat, lon=source_lon)
        

        # Save the modified overwrite to a new file
        overwrite_ds.to_netcdf(output_file)
        print(f"Latitude and longitude values in {overwrite_file} have been overwritten using data from {source_file} with destination: {output_file}.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        # Close the xarray datasets
        overwrite_ds.close()
        source_ds.close()


In [None]:
# gbuster usage
source_file = "/projects/alcaps/cmip6/CESM2-WACCM/ua_day_CESM2-WACCM_ssp585_r3i1p1f1_gn_20150101-20241231.nc"

# bad_files = [
#     '/projects/alcaps/cmip6/CESM2-WACCM/orog_fx_CESM2-WACCM_ssp585_r3i1p1f1_gn.nc',
#     '/projects/alcaps/cmip6/CESM2-WACCM/ua_day_CESM2-WACCM_historical_r3i1p1f1_gn_20000101-20091231.nc',
#     '/projects/alcaps/cmip6/CESM2-WACCM/ua_day_CESM2-WACCM_historical_r3i1p1f1_gn_20100101-20141231.nc',
#     '/projects/alcaps/cmip6/CESM2-WACCM/va_day_CESM2-WACCM_historical_r3i1p1f1_gn_20000101-20091231.nc',
#     '/projects/alcaps/cmip6/CESM2-WACCM/va_day_CESM2-WACCM_historical_r3i1p1f1_gn_20100101-20141231.nc',
#     '/projects/alcaps/cmip6/CESM2-WACCM/zg_day_CESM2-WACCM_historical_r3i1p1f1_gn_20000101-20091231.nc',
#     '/projects/alcaps/cmip6/CESM2-WACCM/zg_day_CESM2-WACCM_historical_r3i1p1f1_gn_20100101-20141231.nc',
# ]


bad_files = [
    '/projects/alcaps/cmip6/CESM2-WACCM/ua_day_CESM2-WACCM_historical_r3i1p1f1_gn_19800101-19891231.nc',
    '/projects/alcaps/cmip6/CESM2-WACCM/ua_day_CESM2-WACCM_historical_r3i1p1f1_gn_19900101-19991231.nc',
    '/projects/alcaps/cmip6/CESM2-WACCM/va_day_CESM2-WACCM_historical_r3i1p1f1_gn_19800101-19891231.nc',
    '/projects/alcaps/cmip6/CESM2-WACCM/va_day_CESM2-WACCM_historical_r3i1p1f1_gn_19900101-19991231.nc',
    '/projects/alcaps/cmip6/CESM2-WACCM/zg_day_CESM2-WACCM_historical_r3i1p1f1_gn_19800101-19891231.nc',
    '/projects/alcaps/cmip6/CESM2-WACCM/zg_day_CESM2-WACCM_historical_r3i1p1f1_gn_19900101-19991231.nc'
]

for bad_file in bad_files:
    output_file = bad_file.replace('.nc', '_overwrite_coords.nc')
    overwrite_lat_lon_xarray(bad_file, source_file, output_file)

In [None]:
source_file = "/projects/alcaps/cmip6/CESM2/ua_day_CESM2_ssp585_r4i1p1f1_gn_20150101-20241231.nc"
#source_file = '/projects/alcaps/cmip6/CESM2/pr_day_CESM2_historical_r4i1p1f1_gn_20000101-20091231_overwrite_coords.nc'

# bad_files = [
#  # '/projects/alcaps/cmip6/CESM2/orog_fx_CESM2_ssp585_r4i1p1f1_gn.nc',
#  # '/projects/alcaps/cmip6/CESM2/ua_day_CESM2_historical_r4i1p1f1_gn_20000101-20091231.nc',
#  # '/projects/alcaps/cmip6/CESM2/ua_day_CESM2_historical_r4i1p1f1_gn_20100101-20141231.nc',
#  # '/projects/alcaps/cmip6/CESM2/va_day_CESM2_historical_r4i1p1f1_gn_20000101-20091231.nc',
#  # '/projects/alcaps/cmip6/CESM2/va_day_CESM2_historical_r4i1p1f1_gn_20100101-20141231.nc',
#  # '/projects/alcaps/cmip6/CESM2/zg_day_CESM2_historical_r4i1p1f1_gn_20000101-20091231.nc',
#  # '/projects/alcaps/cmip6/CESM2/zg_day_CESM2_historical_r4i1p1f1_gn_20100101-20141231.nc',
#     '/projects/alcaps/cmip6/CESM2/pr_day_CESM2_historical_r4i1p1f1_gn_20100101-20150103.nc',
# ]

bad_files = [
    '/projects/alcaps/cmip6/CESM2/ua_day_CESM2_historical_r4i1p1f1_gn_19800101-19891231.nc',
    '/projects/alcaps/cmip6/CESM2/ua_day_CESM2_historical_r4i1p1f1_gn_19900101-19991231.nc',
    '/projects/alcaps/cmip6/CESM2/va_day_CESM2_historical_r4i1p1f1_gn_19800101-19891231.nc',
    '/projects/alcaps/cmip6/CESM2/va_day_CESM2_historical_r4i1p1f1_gn_19900101-19991231.nc',
    '/projects/alcaps/cmip6/CESM2/zg_day_CESM2_historical_r4i1p1f1_gn_19800101-19891231.nc',
    '/projects/alcaps/cmip6/CESM2/zg_day_CESM2_historical_r4i1p1f1_gn_19900101-19991231.nc'
]

for bad_file in bad_files:
    output_file = bad_file.replace('.nc', '_overwrite_coords.nc')
    overwrite_lat_lon_xarray(bad_file, source_file, output_file)