This notebook contains the Bias correction (BC) process.

The user can choose either to correct precipitation (with the method BcsdPrecipitation(return_anoms=False), either temperature (with the method BcsdTemperature(return_anoms=False)).

The set of data is corrected following the following process: 

1) observation data and historic modeled data, both for the same period of time, are used to fit the model. Both set of data are at the emplacement of the closest NOAA meteorological station to the project
2) the data at the same emplacement is corrected by the fitted model. This permit ot compare the historic corrected data with the fitted model

# User input

In [1]:
climate_var = 'tasmax' 

name_col_NOAA = 'TMAX'
name_col_mod_hist = 'Daily Maximum Near-Surface Air Temperature °C'
name_col_mod_proj = 'Daily Maximum Near-Surface Air Temperature °C'

unit='Celsius'
resolution = 'day'

start_y_fit = 1970#1970
stop_y_fit = 1984 # 2014

start_y_correct = 1985 # 2030
stop_y_correct = 2014 # 2060
# precipitation : 'pr',name_col_NOAA = 'PRCP', name_col_mod_hist='Mean of the daily precipitation rate mm/day', name_col_mod_proj = 'Mean of the daily precipitation rate mm_per_day', unit = 'mm_per_day', resolution = 'day'
# temperature : 'tas', name_col_NOAA = 'TAVG', name_col_mod_proj = 'Daily Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'
# maximum temperature : 'tasmax', name_col_NOAA = 'TMAX', name_col_mod_proj = 'Daily Maximum Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'
# minimum temperature : 'tasmin', name_col_NOAA = 'TMIN', name_col_mod_proj = 'Daily Minimum Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'


# Import packages and functions

In [5]:
# import modules and functions

import pandas as pd
import numpy as np
import os
import os.path

import matplotlib.pyplot as plt

# import data
from Functions_ImportData import import_treat_modeled_NEX_GDDP_CMIP6
from Functions_ImportData import import_filtered_NOAA_obs
from Functions_ImportData import import_treat_obs_NOAA
from Functions_ImportData import import_treat_modeled_NEX_GDDP_CMIP6_close_to_stationNOAA
from Bias_correction_function import BC
from Bias_correction_function import treat_data_for_test
from Bias_correction_function import BCSD_Precipitation_return_anoms_to_apply
from Bias_correction_function import BCSD_Temperature_return_anoms_to_apply
from Functions_Indicators import add_year_month_season

# Import data

In [6]:
# import observation data
data_obs_NOAA_filtered=import_filtered_NOAA_obs()

In [7]:
data_obs_NOAA_filtered_to_fit = data_obs_NOAA_filtered[data_obs_NOAA_filtered['Year'].between(start_y_fit,stop_y_fit)]

In [8]:
# import historic modeled data
climate_var_NEX_GDDP_CMIP6_EmplacementStation=import_treat_modeled_NEX_GDDP_CMIP6_close_to_stationNOAA(climate_var, unit)

In [9]:
climate_var_NEX_GDDP_CMIP6_EmplacementStation

Unnamed: 0,Name station,Experiment,Model,Latitude,Longitude,Date,Daily Maximum Near-Surface Air Temperature °C,Year,Month,Season
0,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,01-01-1950,33.245087,1950,Jan,Humid
1,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,02-01-1950,33.782166,1950,Jan,Humid
2,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,03-01-1950,33.477509,1950,Jan,Humid
3,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,04-01-1950,32.702179,1950,Jan,Humid
4,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,05-01-1950,32.773590,1950,Jan,Humid
...,...,...,...,...,...,...,...,...,...,...
6274165,"BEIRA, MZ",ssp370,TaiESM1,-19.875,34.875,27-12-2100,38.744629,2100,Dec,Humid
6274166,"BEIRA, MZ",ssp370,TaiESM1,-19.875,34.875,28-12-2100,38.808044,2100,Dec,Humid
6274167,"BEIRA, MZ",ssp370,TaiESM1,-19.875,34.875,29-12-2100,41.413300,2100,Dec,Humid
6274168,"BEIRA, MZ",ssp370,TaiESM1,-19.875,34.875,30-12-2100,31.957153,2100,Dec,Humid


In [10]:
climate_var_NEX_GDDP_CMIP6_to_fit=climate_var_NEX_GDDP_CMIP6_EmplacementStation[climate_var_NEX_GDDP_CMIP6_EmplacementStation['Year'].between(start_y_fit,stop_y_fit)]

In [11]:
# select years between which the data should be corrected. Usually, the data are not corrected for the same number of years over which the model was fitted
climate_var_NEX_GDDP_CMIP6_to_correct=climate_var_NEX_GDDP_CMIP6_EmplacementStation[climate_var_NEX_GDDP_CMIP6_EmplacementStation['Year'].between(start_y_correct,stop_y_correct)]

# Apply BC

In [12]:
# preprare list of model to BC
list_model=list(set(climate_var_NEX_GDDP_CMIP6_to_correct['Model']))
list_model.remove('NESM3') # this model has too much Nan to be interesting
list_model.remove('CMCC-CM2-SR5')
list_model.remove('TaiESM1')

In [13]:
climate_var_NEX_GDDP_CMIP6_to_correct

Unnamed: 0,Name station,Experiment,Model,Latitude,Longitude,Date,Daily Maximum Near-Surface Air Temperature °C,Year,Month,Season
12784,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,01-01-1985,30.354950,1985,Jan,Humid
12785,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,02-01-1985,30.885773,1985,Jan,Humid
12786,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,03-01-1985,29.719696,1985,Jan,Humid
12787,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,04-01-1985,29.319183,1985,Jan,Humid
12788,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,05-01-1985,28.990509,1985,Jan,Humid
...,...,...,...,...,...,...,...,...,...,...
4515149,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,27-12-2014,35.337708,2014,Dec,Humid
4515150,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,28-12-2014,34.965546,2014,Dec,Humid
4515151,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,29-12-2014,36.526917,2014,Dec,Humid
4515152,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,30-12-2014,37.756683,2014,Dec,Humid


In [14]:
df_bc_corrected=pd.DataFrame()
for name_project in list(set(climate_var_NEX_GDDP_CMIP6_to_correct['Name station'])):
    # select data for one project
    climate_var_NEX_GDDP_CMIP6_Project_temp=climate_var_NEX_GDDP_CMIP6_to_correct[climate_var_NEX_GDDP_CMIP6_to_correct['Name station']==name_project]
    # depending on project, don t fit the model with the same station data
    name_station = name_project
    for scenario in list(set(climate_var_NEX_GDDP_CMIP6_to_correct['Experiment'])):
        # select data for one scenario
        climate_var_NEX_GDDP_CMIP6_Project_temp_2=climate_var_NEX_GDDP_CMIP6_Project_temp[climate_var_NEX_GDDP_CMIP6_Project_temp['Experiment']==scenario]
        for model in list_model:
            print('For '+name_project+', '+scenario+' and '+model)
            # select data for one model
            climate_var_NEX_GDDP_CMIP6_Project_temp_3=climate_var_NEX_GDDP_CMIP6_Project_temp_2[climate_var_NEX_GDDP_CMIP6_Project_temp_2['Model']==model]
            # concatenate observed and historic modeled data in one dataframe df
            df = treat_data_for_test(data_obs_NOAA_filtered_to_fit,name_col_NOAA,climate_var_NEX_GDDP_CMIP6_to_fit,name_col_mod_hist,name_station,model)
            # apply method; choose method based on if climate_var is precipitation or temperature
            # X is the modeled historic set of data used to fit the model
            # y is the observed set of data used to fit the model
            # out is the product of the bias correction
            if climate_var == 'pr':
                (X_pcp,y_pcp,out)=BCSD_Precipitation_return_anoms_to_apply(df,climate_var_NEX_GDDP_CMIP6_Project_temp_3[['Date',name_col_mod_proj]].dropna().reset_index(drop=True))
            else:
                (X_temp,y_temp,out)=BCSD_Temperature_return_anoms_to_apply(df,climate_var_NEX_GDDP_CMIP6_Project_temp_3[['Date',name_col_mod_proj]].dropna().reset_index(drop=True))
            out = out.reset_index() # date is no longer the index
            out['Name project']=name_project # impose the name of the project in the dataframe
            out['Experiment']=scenario # impose the name of the scenario in the dataframe
            out['Model']=model # impose the name of the model in the dataframe
            df_bc_corrected=pd.concat([df_bc_corrected,out]) # concat the result to the other results

For PEMBA, MZ, historical and CanESM5
self.time_grouper not a str, automatic association for timestep is monthly
For PEMBA, MZ, historical and MIROC6
self.time_grouper not a str, automatic association for timestep is monthly
For PEMBA, MZ, historical and ACCESS-CM2
self.time_grouper not a str, automatic association for timestep is monthly
For PEMBA, MZ, historical and MRI-ESM2-0
self.time_grouper not a str, automatic association for timestep is monthly
For PEMBA, MZ, historical and NorESM2-LM
self.time_grouper not a str, automatic association for timestep is monthly
For PEMBA, MZ, historical and MPI-ESM1-2-LR
self.time_grouper not a str, automatic association for timestep is monthly
For PEMBA, MZ, historical and BCC-CSM2-MR
self.time_grouper not a str, automatic association for timestep is monthly
For PEMBA, MZ, historical and MPI-ESM1-2-HR
self.time_grouper not a str, automatic association for timestep is monthly
For PEMBA, MZ, historical and NorESM2-MM
self.time_grouper not a str, au

In [15]:
df_t = df_bc_corrected.copy(deep=True)

In [16]:
df_bc_corrected = df_bc_corrected.reset_index(drop=True) # reset index not to have several time the same index

In [17]:
df_bc_corrected['Date']=pd.to_datetime(df_bc_corrected[['Date']].Date,format='%Y-%d-%m')
df_bc_corrected['Date']=df_bc_corrected['Date'].astype(str)

In [18]:
df_bc_corrected

Unnamed: 0,Date,Daily Maximum Near-Surface Air Temperature °C,Name project,Experiment,Model
0,1985-01-01,30.329177,"PEMBA, MZ",historical,CanESM5
1,1985-01-02,29.127760,"PEMBA, MZ",historical,CanESM5
2,1985-01-03,28.376761,"PEMBA, MZ",historical,CanESM5
3,1985-01-04,29.146270,"PEMBA, MZ",historical,CanESM5
4,1985-01-05,30.810044,"PEMBA, MZ",historical,CanESM5
...,...,...,...,...,...
361471,2014-12-27,29.713884,"CHIMOIO, MZ",historical,CMCC-ESM2
361472,2014-12-28,32.947503,"CHIMOIO, MZ",historical,CMCC-ESM2
361473,2014-12-29,35.565973,"CHIMOIO, MZ",historical,CMCC-ESM2
361474,2014-12-30,25.272942,"CHIMOIO, MZ",historical,CMCC-ESM2


In [19]:
# add month, year and season. Will be usefull to apply CRVA
df_bc_corrected = add_year_month_season(df_bc_corrected,'Date')

In [20]:
df_bc_corrected

Unnamed: 0,Date,Daily Maximum Near-Surface Air Temperature °C,Name project,Experiment,Model,Year,Month,Season
0,1985-01-01,30.329177,"PEMBA, MZ",historical,CanESM5,1985,Jan,Humid
1,1985-01-02,29.127760,"PEMBA, MZ",historical,CanESM5,1985,Jan,Humid
2,1985-01-03,28.376761,"PEMBA, MZ",historical,CanESM5,1985,Jan,Humid
3,1985-01-04,29.146270,"PEMBA, MZ",historical,CanESM5,1985,Jan,Humid
4,1985-01-05,30.810044,"PEMBA, MZ",historical,CanESM5,1985,Jan,Humid
...,...,...,...,...,...,...,...,...
361471,2014-12-27,29.713884,"CHIMOIO, MZ",historical,CMCC-ESM2,2014,Dec,Humid
361472,2014-12-28,32.947503,"CHIMOIO, MZ",historical,CMCC-ESM2,2014,Dec,Humid
361473,2014-12-29,35.565973,"CHIMOIO, MZ",historical,CMCC-ESM2,2014,Dec,Humid
361474,2014-12-30,25.272942,"CHIMOIO, MZ",historical,CMCC-ESM2,2014,Dec,Humid


# Export result as csv file

In [21]:
path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\NEX-GDDP-CMIP6-AllMoz\csv_file'

In [22]:
if not os.path.isdir(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y_correct)+'-'+str(stop_y_correct)+'_BiasCorrected')):
    os.makedirs(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y_correct)+'-'+str(stop_y_correct)+'_BiasCorrected'))

In [23]:
df_bc_corrected.to_csv(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y_correct)+'-'+str(stop_y_correct)+'_BiasCorrected',climate_var+'_'+unit+resolution+str(start_y_correct)+'-'+str(stop_y_correct)+'_BiasCorrected_EmplacementStationNOAA.csv'))