This notebook contains the Bias correction (BC) process.

The user can choose either to correct precipitation (with the method BcsdPrecipitation(return_anoms=False), either temperature (with the method BcsdTemperature(return_anoms=False)).

The set of data is corrected following the following process: 

1) observation data and historic modeled data, both for the same period of time, are used to fit the model. Both set of data are at the emplacement of the closest NOAA meteorological station to the project
2) the data at the same emplacement is corrected by the fitted model. This permit ot compare the historic corrected data with the fitted model

# User input

In [1]:
climate_var = 'tasmax' 

name_col_NOAA = 'TMAX'
name_col_mod_hist = 'Daily Maximum Near-Surface Air Temperature °C'
name_col_mod_proj = 'Daily Maximum Near-Surface Air Temperature °C'

unit='Celsius'
resolution = 'day'

start_y = 1970
stop_y = 2014
# precipitation : 'pr',name_col_NOAA = 'PRCP', name_col_mod_hist='Mean of the daily precipitation rate mm/day', name_col_mod_proj = 'Mean of the daily precipitation rate mm_per_day', unit = 'mm_per_day', resolution = 'day'
# temperature : 'tas', name_col_NOAA = 'TAVG', name_col_mod_proj = 'Daily Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'
# maximum temperature : 'tasmax', name_col_NOAA = 'TMAX', name_col_mod_proj = 'Daily Maximum Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'
# minimum temperature : 'tasmin', name_col_NOAA = 'TMIN', name_col_mod_proj = 'Daily Minimum Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'


# Import packages and functions

In [2]:
# import modules and functions

import pandas as pd
import numpy as np
import os
import os.path

import matplotlib.pyplot as plt

# import data
from Functions_ImportData import import_treat_modeled_NEX_GDDP_CMIP6
from Functions_ImportData import import_filtered_NOAA_obs
from Functions_ImportData import import_treat_obs_NOAA
from Functions_ImportData import import_treat_modeled_NEX_GDDP_CMIP6_close_to_stationNOAA
from Bias_correction_function import BC
from Bias_correction_function import treat_data_for_test
from Bias_correction_function import BCSD_Precipitation_return_anoms_to_apply
from Bias_correction_function import BCSD_Temperature_return_anoms_to_apply
from Functions_Indicators import add_year_month_season

# Import data

In [3]:
# import observation data
data_obs_NOAA_filtered=import_filtered_NOAA_obs()

In [4]:
# import historic modeled data
climate_var_NEX_GDDP_CMIP6_EmplacementStation=import_treat_modeled_NEX_GDDP_CMIP6_close_to_stationNOAA(climate_var, unit)

In [5]:
climate_var_NEX_GDDP_CMIP6_EmplacementStation

Unnamed: 0,Name station,Experiment,Model,Latitude,Longitude,Date,Daily Maximum Near-Surface Air Temperature °C,Year,Month,Season
0,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,01-01-1970,29.407166,1970,Jan,Humid
1,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,02-01-1970,29.242828,1970,Jan,Humid
2,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,03-01-1970,29.896942,1970,Jan,Humid
3,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,04-01-1970,30.779022,1970,Jan,Humid
4,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,05-01-1970,31.433685,1970,Jan,Humid
...,...,...,...,...,...,...,...,...,...,...
690307,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,27-12-2014,35.337708,2014,Dec,Humid
690308,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,28-12-2014,34.965546,2014,Dec,Humid
690309,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,29-12-2014,36.526917,2014,Dec,Humid
690310,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,30-12-2014,37.756683,2014,Dec,Humid


In [8]:
# select years between which the data should be corrected. Usually, the data are not corrected for the same number of years over which the model was fitted
climate_var_NEX_GDDP_CMIP6_to_correct=climate_var_NEX_GDDP_CMIP6_EmplacementStation[climate_var_NEX_GDDP_CMIP6_EmplacementStation['Year'].between(start_y,stop_y)]

# Apply BC

In [9]:
# preprare list of model to BC
list_model=list(set(climate_var_NEX_GDDP_CMIP6_to_correct['Model']))
list_model.remove('NESM3') # this model has too much Nan to be interesting
list_model.remove('CMCC-CM2-SR5')
list_model.remove('TaiESM1')

In [10]:
climate_var_NEX_GDDP_CMIP6_to_correct

Unnamed: 0,Name station,Experiment,Model,Latitude,Longitude,Date,Daily Maximum Near-Surface Air Temperature °C,Year,Month,Season
0,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,01-01-1970,29.407166,1970,Jan,Humid
1,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,02-01-1970,29.242828,1970,Jan,Humid
2,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,03-01-1970,29.896942,1970,Jan,Humid
3,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,04-01-1970,30.779022,1970,Jan,Humid
4,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,05-01-1970,31.433685,1970,Jan,Humid
...,...,...,...,...,...,...,...,...,...,...
690307,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,27-12-2014,35.337708,2014,Dec,Humid
690308,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,28-12-2014,34.965546,2014,Dec,Humid
690309,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,29-12-2014,36.526917,2014,Dec,Humid
690310,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,30-12-2014,37.756683,2014,Dec,Humid


In [11]:
df_bc_corrected=pd.DataFrame()
for name_project in list(set(climate_var_NEX_GDDP_CMIP6_to_correct['Name station'])):
    # select data for one project
    climate_var_NEX_GDDP_CMIP6_Project_temp=climate_var_NEX_GDDP_CMIP6_to_correct[climate_var_NEX_GDDP_CMIP6_to_correct['Name station']==name_project]
    # depending on project, don t fit the model with the same station data
    name_station = name_project
    for scenario in list(set(climate_var_NEX_GDDP_CMIP6_to_correct['Experiment'])):
        # select data for one scenario
        climate_var_NEX_GDDP_CMIP6_Project_temp_2=climate_var_NEX_GDDP_CMIP6_Project_temp[climate_var_NEX_GDDP_CMIP6_Project_temp['Experiment']==scenario]
        for model in list_model:
            print('For '+name_project+', '+scenario+' and '+model)
            # select data for one model
            climate_var_NEX_GDDP_CMIP6_Project_temp_3=climate_var_NEX_GDDP_CMIP6_Project_temp_2[climate_var_NEX_GDDP_CMIP6_Project_temp_2['Model']==model]
            # concatenate observed and historic modeled data in one dataframe df
            df = treat_data_for_test(data_obs_NOAA_filtered,name_col_NOAA,climate_var_NEX_GDDP_CMIP6_EmplacementStation,name_col_mod_hist,name_station,model)
            # apply method; choose method based on if climate_var is precipitation or temperature
            # X is the modeled historic set of data used to fit the model
            # y is the observed set of data used to fit the model
            # out is the product of the bias correction
            if climate_var == 'pr':
                (X_pcp,y_pcp,out)=BCSD_Precipitation_return_anoms_to_apply(df,climate_var_NEX_GDDP_CMIP6_Project_temp_3[['Date',name_col_mod_proj]].dropna().reset_index(drop=True))
            else:
                (X_temp,y_temp,out)=BCSD_Temperature_return_anoms_to_apply(df,climate_var_NEX_GDDP_CMIP6_Project_temp_3[['Date',name_col_mod_proj]].dropna().reset_index(drop=True))
            out = out.reset_index() # date is no longer the index
            out['Name project']=name_project # impose the name of the project in the dataframe
            out['Experiment']=scenario # impose the name of the scenario in the dataframe
            out['Model']=model # impose the name of the model in the dataframe
            df_bc_corrected=pd.concat([df_bc_corrected,out]) # concat the result to the other results

For BEIRA, MZ, historical and MIROC6
For BEIRA, MZ, historical and ACCESS-CM2
For BEIRA, MZ, historical and CanESM5
For BEIRA, MZ, historical and BCC-CSM2-MR
For BEIRA, MZ, historical and MRI-ESM2-0
For BEIRA, MZ, historical and CMCC-ESM2
For BEIRA, MZ, historical and MPI-ESM1-2-LR
For BEIRA, MZ, historical and MPI-ESM1-2-HR
For BEIRA, MZ, historical and NorESM2-LM
For BEIRA, MZ, historical and NorESM2-MM
For BEIRA, MZ, historical and ACCESS-ESM1-5
For CHIMOIO, MZ, historical and MIROC6
For CHIMOIO, MZ, historical and ACCESS-CM2
For CHIMOIO, MZ, historical and CanESM5
For CHIMOIO, MZ, historical and BCC-CSM2-MR
For CHIMOIO, MZ, historical and MRI-ESM2-0
For CHIMOIO, MZ, historical and CMCC-ESM2
For CHIMOIO, MZ, historical and MPI-ESM1-2-LR
For CHIMOIO, MZ, historical and MPI-ESM1-2-HR
For CHIMOIO, MZ, historical and NorESM2-LM
For CHIMOIO, MZ, historical and NorESM2-MM
For CHIMOIO, MZ, historical and ACCESS-ESM1-5
For PEMBA, MZ, historical and MIROC6
For PEMBA, MZ, historical and ACCES

In [12]:
df_t = df_bc_corrected.copy(deep=True)

In [13]:
df_bc_corrected = df_bc_corrected.reset_index(drop=True) # reset index not to have several time the same index

In [14]:
df_bc_corrected['Date']=pd.to_datetime(df_bc_corrected[['Date']].Date,format='%Y-%d-%m')
df_bc_corrected['Date']=df_bc_corrected['Date'].astype(str)

In [15]:
df_bc_corrected

Unnamed: 0,Date,Daily Maximum Near-Surface Air Temperature °C,Name project,Experiment,Model
0,1970-01-01,31.951218,"BEIRA, MZ",historical,MIROC6
1,1970-01-02,32.405873,"BEIRA, MZ",historical,MIROC6
2,1970-01-03,32.884954,"BEIRA, MZ",historical,MIROC6
3,1970-01-04,33.224031,"BEIRA, MZ",historical,MIROC6
4,1970-01-05,33.722513,"BEIRA, MZ",historical,MIROC6
...,...,...,...,...,...
542218,2014-12-27,27.981671,"PEMBA, MZ",historical,ACCESS-ESM1-5
542219,2014-12-28,30.778977,"PEMBA, MZ",historical,ACCESS-ESM1-5
542220,2014-12-29,32.256610,"PEMBA, MZ",historical,ACCESS-ESM1-5
542221,2014-12-30,34.175364,"PEMBA, MZ",historical,ACCESS-ESM1-5


In [18]:
# add month, year and season. Will be usefull to apply CRVA
df_bc_corrected = add_year_month_season(df_bc_corrected,'Date')

In [19]:
df_bc_corrected

Unnamed: 0,Date,Daily Maximum Near-Surface Air Temperature °C,Name project,Experiment,Model,Year,Month,Season
0,1970-01-01,31.951218,"BEIRA, MZ",historical,MIROC6,1970,Jan,Humid
1,1970-01-02,32.405873,"BEIRA, MZ",historical,MIROC6,1970,Jan,Humid
2,1970-01-03,32.884954,"BEIRA, MZ",historical,MIROC6,1970,Jan,Humid
3,1970-01-04,33.224031,"BEIRA, MZ",historical,MIROC6,1970,Jan,Humid
4,1970-01-05,33.722513,"BEIRA, MZ",historical,MIROC6,1970,Jan,Humid
...,...,...,...,...,...,...,...,...
542218,2014-12-27,27.981671,"PEMBA, MZ",historical,ACCESS-ESM1-5,2014,Dec,Humid
542219,2014-12-28,30.778977,"PEMBA, MZ",historical,ACCESS-ESM1-5,2014,Dec,Humid
542220,2014-12-29,32.256610,"PEMBA, MZ",historical,ACCESS-ESM1-5,2014,Dec,Humid
542221,2014-12-30,34.175364,"PEMBA, MZ",historical,ACCESS-ESM1-5,2014,Dec,Humid


# Export result as csv file

In [25]:
path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\NEX-GDDP-CMIP6-AllMoz\csv_file'

In [26]:
if not os.path.isdir(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y)+'-'+str(stop_y)+'_BiasCorrected')):
    os.makedirs(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y)+'-'+str(stop_y)+'_BiasCorrected'))

In [27]:
df_bc_corrected.to_csv(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y)+'-'+str(stop_y)+'_BiasCorrected',climate_var+'_'+unit+resolution+str(start_y)+'-'+str(stop_y)+'_BiasCorrected_EmplacementStationNOAA.csv'))