This notebook contains the Bias correction (BC) process.

The user can choose either to correct precipitation (with the method BcsdPrecipitation(return_anoms=False), either temperature (with the method BcsdTemperature(return_anoms=False)).

The set of data is corrected following the following process: 

1) observation data and historic modeled data, both for the same period of time, are used to fit the model. Both set of data are at the emplacement of the closest NOAA meteorological station to the project
2) the data at the emplacement of the project is corrected by the fitted model. The data to correct is not at the same emplacement as the data use to fit the model. But, as explain previously, the data use to fit the model are at the emplecement of the closest NOAA station to the emplecement of the project

This notebook correct the data in the past, in order to compare historique bias corrected data with the historic observation data.

# User input

In [1]:
climate_var = 'tasmin' 

name_col_NOAA = 'TMIN'
name_col_mod_hist = 'Daily Minimum Near-Surface Air Temperature °C'
name_col_mod_proj = 'Daily Minimum Near-Surface Air Temperature °C'

unit='Celsius'
resolution = 'day'

start_y = 2030 #1970 # 2030
stop_y = 2074#2014 # 2065
# precipitation : 'pr',name_col_NOAA = 'PRCP', name_col_mod_hist='Mean of the daily precipitation rate mm/day', name_col_mod_proj = 'Mean of the daily precipitation rate mm_per_day', unit = 'mm_per_day', resolution = 'day'
# temperature : 'tas', name_col_NOAA = 'TAVG', name_col_mod_proj = 'Daily Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'
# maximum temperature : 'tasmax', name_col_NOAA = 'TMAX', name_col_mod_proj = 'Daily Maximum Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'
# minimum temperature : 'tasmin', name_col_NOAA = 'TMIN', name_col_mod_proj = 'Daily Minimum Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'


# Import packages and functions

In [4]:
# import modules and functions

import pandas as pd
import numpy as np
import os
import os.path

import matplotlib.pyplot as plt

import sys 
sys.path.append("../0-Functions")

# import data
from Functions_ImportData import import_treat_modeled_NEX_GDDP_CMIP6
from Functions_ImportData import import_filtered_NOAA_obs
from Functions_ImportData import import_treat_obs_NOAA
from Functions_ImportData import import_treat_modeled_NEX_GDDP_CMIP6_close_to_stationNOAA
from Bias_correction_function import BC
from Bias_correction_function import treat_data_for_test
from Bias_correction_function import BCSD_Precipitation_return_anoms_to_apply
from Bias_correction_function import BCSD_Temperature_return_anoms_to_apply
from Functions_Indicators import add_year_month_season

# Import data

In [3]:
# import observation data
data_obs_NOAA_filtered=import_filtered_NOAA_obs()

In [4]:
# import historic modeled data
climate_var_NEX_GDDP_CMIP6_EmplacementStation=import_treat_modeled_NEX_GDDP_CMIP6_close_to_stationNOAA(climate_var, unit)

In [5]:
climate_var_NEX_GDDP_CMIP6_EmplacementStation

Unnamed: 0,Name station,Experiment,Model,Latitude,Longitude,Date,Daily Minimum Near-Surface Air Temperature °C,Year,Month,Season
0,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,01-01-1950,23.674591,1950,Jan,Humid
1,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,02-01-1950,23.931641,1950,Jan,Humid
2,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,03-01-1950,24.323883,1950,Jan,Humid
3,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,04-01-1950,24.575714,1950,Jan,Humid
4,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,05-01-1950,24.466888,1950,Jan,Humid
...,...,...,...,...,...,...,...,...,...,...
6274165,"BEIRA, MZ",ssp370,TaiESM1,-19.875,34.875,27-12-2100,21.782349,2100,Dec,Humid
6274166,"BEIRA, MZ",ssp370,TaiESM1,-19.875,34.875,28-12-2100,21.843018,2100,Dec,Humid
6274167,"BEIRA, MZ",ssp370,TaiESM1,-19.875,34.875,29-12-2100,24.375488,2100,Dec,Humid
6274168,"BEIRA, MZ",ssp370,TaiESM1,-19.875,34.875,30-12-2100,14.916718,2100,Dec,Humid


In [6]:
climate_var_NEX_GDDP_CMIP6_to_fit=climate_var_NEX_GDDP_CMIP6_EmplacementStation[climate_var_NEX_GDDP_CMIP6_EmplacementStation['Year'].between(1970,2014)]

In [7]:
# import data to correct
climate_var_NEX_GDDP_CMIP6_Project=import_treat_modeled_NEX_GDDP_CMIP6(climate_var,unit,resolution,1950,2100)

In [8]:
climate_var_NEX_GDDP_CMIP6_Project

Unnamed: 0,Name project,Experiment,Model,Latitude,Longitude,Date,Daily Minimum Near-Surface Air Temperature °C,Year,Month,Season
0,WTP_Mutua_EIB,historical,ACCESS-CM2,-19.375,34.625,01-01-1950,24.757233,1950,Jan,Humid
1,WTP_Mutua_EIB,historical,ACCESS-CM2,-19.375,34.625,02-01-1950,24.676422,1950,Jan,Humid
2,WTP_Mutua_EIB,historical,ACCESS-CM2,-19.375,34.625,03-01-1950,24.394623,1950,Jan,Humid
3,WTP_Mutua_EIB,historical,ACCESS-CM2,-19.375,34.625,04-01-1950,24.546448,1950,Jan,Humid
4,WTP_Mutua_EIB,historical,ACCESS-CM2,-19.375,34.625,05-01-1950,20.226654,1950,Jan,Humid
...,...,...,...,...,...,...,...,...,...,...
8365555,Pemba_EIB,ssp370,TaiESM1,-12.875,40.625,27-12-2100,29.714172,2100,Dec,Humid
8365556,Pemba_EIB,ssp370,TaiESM1,-12.875,40.625,28-12-2100,29.372620,2100,Dec,Humid
8365557,Pemba_EIB,ssp370,TaiESM1,-12.875,40.625,29-12-2100,28.694733,2100,Dec,Humid
8365558,Pemba_EIB,ssp370,TaiESM1,-12.875,40.625,30-12-2100,27.819519,2100,Dec,Humid


In [9]:
# select years between which the data should be corrected. Usually, the data are not corrected for the same number of years over which the model was fitted
climate_var_NEX_GDDP_CMIP6_Project_to_correct=climate_var_NEX_GDDP_CMIP6_Project[climate_var_NEX_GDDP_CMIP6_Project['Year'].between(start_y,stop_y)]

# Apply BC

In [10]:
# preprare list of model to BC
list_model=list(set(climate_var_NEX_GDDP_CMIP6_Project_to_correct['Model']))
list_model.remove('NESM3') # this model has too much Nan to be interesting
list_model.remove('CMCC-CM2-SR5')
list_model.remove('TaiESM1')

In [11]:
df_bc_corrected=pd.DataFrame()
for name_project in list(set(climate_var_NEX_GDDP_CMIP6_Project_to_correct['Name project'])):
    # select data for one project
    climate_var_NEX_GDDP_CMIP6_Project_temp=climate_var_NEX_GDDP_CMIP6_Project_to_correct[climate_var_NEX_GDDP_CMIP6_Project_to_correct['Name project']==name_project]
    # depending on project, don t fit the model with the same station data
    if name_project == 'WTP_Mutua_EIB':
        name_station = 'BEIRA, MZ'
    if name_project =='Chimoio_WTP_EIB' or name_project =='Gorongosa_EIB':
        name_station = 'CHIMOIO, MZ'
    if name_project == 'Pemba_EIB':
        name_station = 'PEMBA, MZ'
    for scenario in list(set(climate_var_NEX_GDDP_CMIP6_Project_to_correct['Experiment'])):
        # select data for one scenario
        climate_var_NEX_GDDP_CMIP6_Project_temp_2=climate_var_NEX_GDDP_CMIP6_Project_temp[climate_var_NEX_GDDP_CMIP6_Project_temp['Experiment']==scenario]
        for model in list_model:
            print('For '+name_project+', '+scenario+' and '+model)
            # select data for one model
            climate_var_NEX_GDDP_CMIP6_Project_temp_3=climate_var_NEX_GDDP_CMIP6_Project_temp_2[climate_var_NEX_GDDP_CMIP6_Project_temp_2['Model']==model]
            # concatenate observed and historic modeled data in one dataframe df
            df = treat_data_for_test(data_obs_NOAA_filtered,name_col_NOAA,climate_var_NEX_GDDP_CMIP6_to_fit,name_col_mod_hist,name_station,model)
            # apply method; choose method based on if climate_var is precipitation or temperature
            # X is the modeled historic set of data used to fit the model
            # y is the observed set of data used to fit the model
            # out is the product of the bias correction
            if climate_var == 'pr':
                print('Apply BCSD_precipitation')
                (X_pcp,y_pcp,out)=BCSD_Precipitation_return_anoms_to_apply(df,climate_var_NEX_GDDP_CMIP6_Project_temp_3[['Date',name_col_mod_proj]].dropna().reset_index(drop=True))
            else:
                print('Apply BCSD_temperature')
                (X_temp,y_temp,out)=BCSD_Temperature_return_anoms_to_apply(df,climate_var_NEX_GDDP_CMIP6_Project_temp_3[['Date',name_col_mod_proj]].dropna().reset_index(drop=True))
            out = out.reset_index() # date is no longer the index
            out['Name project']=name_project # impose the name of the project in the dataframe
            out['Experiment']=scenario # impose the name of the scenario in the dataframe
            out['Model']=model # impose the name of the model in the dataframe
            df_bc_corrected=pd.concat([df_bc_corrected,out]) # concat the result to the other results

For Pemba_EIB, ssp126 and MIROC6
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Pemba_EIB, ssp126 and ACCESS-ESM1-5
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Pemba_EIB, ssp126 and CMCC-ESM2
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Pemba_EIB, ssp126 and MRI-ESM2-0
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Pemba_EIB, ssp126 and NorESM2-LM
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Pemba_EIB, ssp126 and MPI-ESM1-2-LR
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Pemba_EIB, ssp126 and MPI-ESM1-2-HR
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Pemba_EIB, ssp126 and CanESM5
Apply BCSD_temper

For Chimoio_WTP_EIB, ssp245 and MPI-ESM1-2-LR
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Chimoio_WTP_EIB, ssp245 and MPI-ESM1-2-HR
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Chimoio_WTP_EIB, ssp245 and CanESM5
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Chimoio_WTP_EIB, ssp245 and BCC-CSM2-MR
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Chimoio_WTP_EIB, ssp245 and ACCESS-CM2
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Chimoio_WTP_EIB, ssp245 and NorESM2-MM
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Chimoio_WTP_EIB, ssp585 and MIROC6
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For Chimoio_W

For WTP_Mutua_EIB, ssp585 and ACCESS-CM2
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For WTP_Mutua_EIB, ssp585 and NorESM2-MM
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For WTP_Mutua_EIB, ssp370 and MIROC6
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For WTP_Mutua_EIB, ssp370 and ACCESS-ESM1-5
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For WTP_Mutua_EIB, ssp370 and CMCC-ESM2
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For WTP_Mutua_EIB, ssp370 and MRI-ESM2-0
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For WTP_Mutua_EIB, ssp370 and NorESM2-LM
Apply BCSD_temperature
self.time_grouper not a str, automatic association for timestep is monthly
For WTP_Mutua_EIB, ssp370 and

In [12]:
df_bc_corrected = df_bc_corrected.reset_index(drop=True) # reset index not to have several time the same index

In [13]:
df_bc_corrected['Date']=pd.to_datetime(df_bc_corrected[['Date']].Date,format='%Y-%d-%m')
df_bc_corrected['Date']=df_bc_corrected['Date'].astype(str)

In [14]:
df_bc_corrected

Unnamed: 0,Date,Daily Minimum Near-Surface Air Temperature °C,Name project,Experiment,Model
0,2030-01-01,27.234954,Pemba_EIB,ssp126,MIROC6
1,2030-01-02,28.201871,Pemba_EIB,ssp126,MIROC6
2,2030-01-03,27.676143,Pemba_EIB,ssp126,MIROC6
3,2030-01-04,28.159552,Pemba_EIB,ssp126,MIROC6
4,2030-01-05,27.122694,Pemba_EIB,ssp126,MIROC6
...,...,...,...,...,...
2888931,2074-12-27,26.385624,Gorongosa_EIB,ssp370,NorESM2-MM
2888932,2074-12-28,26.192779,Gorongosa_EIB,ssp370,NorESM2-MM
2888933,2074-12-29,23.963829,Gorongosa_EIB,ssp370,NorESM2-MM
2888934,2074-12-30,22.966323,Gorongosa_EIB,ssp370,NorESM2-MM


In [15]:
# add month, year and season. Will be usefull to apply CRVA
df_bc_corrected = add_year_month_season(df_bc_corrected,'Date')

In [16]:
df_bc_corrected

Unnamed: 0,Date,Daily Minimum Near-Surface Air Temperature °C,Name project,Experiment,Model,Year,Month,Season
0,2030-01-01,27.234954,Pemba_EIB,ssp126,MIROC6,2030,Jan,Humid
1,2030-01-02,28.201871,Pemba_EIB,ssp126,MIROC6,2030,Jan,Humid
2,2030-01-03,27.676143,Pemba_EIB,ssp126,MIROC6,2030,Jan,Humid
3,2030-01-04,28.159552,Pemba_EIB,ssp126,MIROC6,2030,Jan,Humid
4,2030-01-05,27.122694,Pemba_EIB,ssp126,MIROC6,2030,Jan,Humid
...,...,...,...,...,...,...,...,...
2888931,2074-12-27,26.385624,Gorongosa_EIB,ssp370,NorESM2-MM,2074,Dec,Humid
2888932,2074-12-28,26.192779,Gorongosa_EIB,ssp370,NorESM2-MM,2074,Dec,Humid
2888933,2074-12-29,23.963829,Gorongosa_EIB,ssp370,NorESM2-MM,2074,Dec,Humid
2888934,2074-12-30,22.966323,Gorongosa_EIB,ssp370,NorESM2-MM,2074,Dec,Humid


# Export result as csv file

In [17]:
path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\NEX-GDDP-CMIP6-AllMoz\csv_file'

In [18]:
if not os.path.isdir(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y)+'-'+str(stop_y)+'_BiasCorrected')):
    os.makedirs(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y)+'-'+str(stop_y)+'_BiasCorrected'))

In [19]:
df_bc_corrected.to_csv(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y)+'-'+str(stop_y)+'_BiasCorrected',climate_var+'_'+unit+resolution+str(start_y)+'-'+str(stop_y)+'_BiasCorrected.csv'))