This notebook contains the Bias correction (BC) process.

The user can choose either to correct precipitation (with the method BcsdPrecipitation(return_anoms=False), either temperature (with the method BcsdTemperature(return_anoms=False)).

The set of data is corrected following the following process: 

1) observation data and historic modeled data, both for the same period of time, are used to fit the model
2) the data at the emplacement of the project is corrected by the fitted model.

This notebook only correct data at Gorongosa, with observation data from the meteorological station at Gorongosa.

# User input

In [1]:
climate_var = 'pr' 

name_col_obs = 'pr'
name_col_mod_hist = 'Mean of the daily precipitation rate mm_per_day'
name_col_mod_proj = 'Mean of the daily precipitation rate mm_per_day'

unit='mm_per_day'
resolution = 'day'

start_y = 1980
stop_y = 2014
# precipitation : 'pr',name_col_NOAA = 'PRCP', name_col_mod_hist='Mean of the daily precipitation rate mm/day', name_col_mod_proj = 'Mean of the daily precipitation rate mm_per_day', unit = 'mm_per_day', resolution = 'day'
# temperature : 'tas', name_col_NOAA = 'TAVG', name_col_mod_proj = 'Daily Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'
# maximum temperature : 'tasmax', name_col_NOAA = 'TMAX', name_col_mod_proj = 'Daily Maximum Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'
# minimum temperature : 'tasmin', name_col_NOAA = 'TMIN', name_col_mod_proj = 'Daily Minimum Near-Surface Air Temperature °C', unit = 'Celsius', resolution = 'day'


# Import packages and functions

In [1]:
# import modules and functions

import pandas as pd
import numpy as np
import os
import os.path

import matplotlib.pyplot as plt

# import functions
import sys 
sys.path.append("../0-Functions")

from Functions_ImportData import import_treat_modeled_NEX_GDDP_CMIP6
from Functions_ImportData import import_filtered_NOAA_obs
from Functions_ImportData import import_treat_obs_NOAA
from Functions_ImportData import import_treat_modeled_NEX_GDDP_CMIP6_close_to_stationNOAA
from Functions_ImportData import import_gorongosa_obs_pr
from Bias_correction_function import BC
from Bias_correction_function import treat_data_for_test
from Bias_correction_function import BCSD_Precipitation_return_anoms_to_apply
from Bias_correction_function import BCSD_Temperature_return_anoms_to_apply
from Functions_Indicators import add_year_month_season

# Import data

In [3]:
# import observation data
data_obs=import_gorongosa_obs_pr()

In [4]:
data_obs['NAME'] = 'Gorongosa_EIB'

In [5]:
data_obs.rename(columns={'time':'DATE'},inplace=True)

In [6]:
data_obs = data_obs[data_obs['Year'].between(1980,2014)]

In [7]:
# import data to correct
climate_var_NEX_GDDP_CMIP6_Project=import_treat_modeled_NEX_GDDP_CMIP6(climate_var,unit,resolution,1950,2100)

In [8]:
climate_var_NEX_GDDP_CMIP6_Project

Unnamed: 0,Name project,Experiment,Model,Latitude,Longitude,Date,Mean of the daily precipitation rate mm_per_day,Year,Month,Season
0,WTP_Mutua_EIB,historical,ACCESS-CM2,-19.375,34.625,01-01-1950,0.657509,1950,Jan,Humid
1,WTP_Mutua_EIB,historical,ACCESS-CM2,-19.375,34.625,02-01-1950,0.442182,1950,Jan,Humid
2,WTP_Mutua_EIB,historical,ACCESS-CM2,-19.375,34.625,03-01-1950,0.031404,1950,Jan,Humid
3,WTP_Mutua_EIB,historical,ACCESS-CM2,-19.375,34.625,04-01-1950,15.071664,1950,Jan,Humid
4,WTP_Mutua_EIB,historical,ACCESS-CM2,-19.375,34.625,05-01-1950,12.865736,1950,Jan,Humid
...,...,...,...,...,...,...,...,...,...,...
8963095,Pemba_EIB,ssp370,TaiESM1,-12.875,40.625,27-12-2100,0.286506,2100,Dec,Humid
8963096,Pemba_EIB,ssp370,TaiESM1,-12.875,40.625,28-12-2100,0.050367,2100,Dec,Humid
8963097,Pemba_EIB,ssp370,TaiESM1,-12.875,40.625,29-12-2100,0.000000,2100,Dec,Humid
8963098,Pemba_EIB,ssp370,TaiESM1,-12.875,40.625,30-12-2100,0.581391,2100,Dec,Humid


In [9]:
climate_var_NEX_GDDP_CMIP6_Project_to_fit = climate_var_NEX_GDDP_CMIP6_Project[climate_var_NEX_GDDP_CMIP6_Project['Year'].between(1980,2014)]

In [10]:
# select years between which the data should be corrected. Usually, the data are not corrected for the same number of years over which the model was fitted
climate_var_NEX_GDDP_CMIP6_to_correct=climate_var_NEX_GDDP_CMIP6_Project[climate_var_NEX_GDDP_CMIP6_Project['Year'].between(start_y,stop_y)]

In [11]:
# only correct data for gorongosa
climate_var_NEX_GDDP_CMIP6_to_correct = climate_var_NEX_GDDP_CMIP6_to_correct[climate_var_NEX_GDDP_CMIP6_to_correct['Name project']== 'Gorongosa_EIB']

In [12]:
climate_var_NEX_GDDP_CMIP6_to_correct.rename(columns = {'Name project': 'Name station'},inplace=True)
climate_var_NEX_GDDP_CMIP6_Project_to_fit.rename(columns = {'Name project': 'Name station'},inplace=True)

# Apply BC

In [13]:
# preprare list of model to BC
list_model=list(set(climate_var_NEX_GDDP_CMIP6_to_correct['Model']))
list_model.remove('NESM3') # this model has too much Nan to be interesting
list_model.remove('CMCC-CM2-SR5')
list_model.remove('TaiESM1')

In [14]:
df_bc_corrected=pd.DataFrame()
for name_project in list(set(climate_var_NEX_GDDP_CMIP6_to_correct['Name station'])):
    # select data for one project
    climate_var_NEX_GDDP_CMIP6_Project_temp=climate_var_NEX_GDDP_CMIP6_to_correct[climate_var_NEX_GDDP_CMIP6_to_correct['Name station']==name_project]
    # depending on project, don t fit the model with the same station data
    name_station = name_project 
    for scenario in list(set(climate_var_NEX_GDDP_CMIP6_to_correct['Experiment'])):
        # select data for one scenario
        climate_var_NEX_GDDP_CMIP6_Project_temp_2=climate_var_NEX_GDDP_CMIP6_Project_temp[climate_var_NEX_GDDP_CMIP6_Project_temp['Experiment']==scenario]
        for model in list_model:
            print('For '+name_project+', '+scenario+' and '+model)
            # select data for one model
            climate_var_NEX_GDDP_CMIP6_Project_temp_3=climate_var_NEX_GDDP_CMIP6_Project_temp_2[climate_var_NEX_GDDP_CMIP6_Project_temp_2['Model']==model]
            # concatenate observed and historic modeled data in one dataframe df
            df = treat_data_for_test(data_obs,name_col_obs,climate_var_NEX_GDDP_CMIP6_Project_to_fit,name_col_mod_hist,name_station,model)
            # apply method; choose method based on if climate_var is precipitation or temperature
            # X is the modeled historic set of data used to fit the model
            # y is the observed set of data used to fit the model
            # out is the product of the bias correction
            if climate_var == 'pr':
                (X_pcp,y_pcp,out)=BCSD_Precipitation_return_anoms_to_apply(df,climate_var_NEX_GDDP_CMIP6_Project_temp_3[['Date',name_col_mod_proj]].dropna().reset_index(drop=True))
            else:
                (X_temp,y_temp,out)=BCSD_Temperature_return_anoms_to_apply(df,climate_var_NEX_GDDP_CMIP6_Project_temp_3[['Date',name_col_mod_proj]].dropna().reset_index(drop=True))
            out = out.reset_index() # date is no longer the index
            out['Name project']=name_project # impose the name of the project in the dataframe
            out['Experiment']=scenario # impose the name of the scenario in the dataframe
            out['Model']=model # impose the name of the model in the dataframe
            df_bc_corrected=pd.concat([df_bc_corrected,out]) # concat the result to the other results

For Gorongosa_EIB, historical and MPI-ESM1-2-LR
For Gorongosa_EIB, historical and ACCESS-CM2
For Gorongosa_EIB, historical and MIROC6
For Gorongosa_EIB, historical and CMCC-ESM2
For Gorongosa_EIB, historical and MRI-ESM2-0
For Gorongosa_EIB, historical and IITM-ESM
For Gorongosa_EIB, historical and ACCESS-ESM1-5
For Gorongosa_EIB, historical and CanESM5
For Gorongosa_EIB, historical and BCC-CSM2-MR
For Gorongosa_EIB, historical and NorESM2-MM
For Gorongosa_EIB, historical and NorESM2-LM
For Gorongosa_EIB, historical and MPI-ESM1-2-HR


In [15]:
df_bc_corrected = df_bc_corrected.reset_index(drop=True) # reset index not to have several time the same index

In [16]:
df_bc_corrected['Date']=pd.to_datetime(df_bc_corrected[['Date']].Date,format='%Y-%d-%m')
df_bc_corrected['Date']=df_bc_corrected['Date'].astype(str)

In [17]:
df_bc_corrected

Unnamed: 0,Date,Mean of the daily precipitation rate mm_per_day,Name project,Experiment,Model
0,1980-01-01,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-LR
1,1980-01-02,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-LR
2,1980-01-03,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-LR
3,1980-01-04,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-LR
4,1980-01-05,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-LR
...,...,...,...,...,...
152986,2014-12-27,3.800000,Gorongosa_EIB,historical,MPI-ESM1-2-HR
152987,2014-12-28,6.332745,Gorongosa_EIB,historical,MPI-ESM1-2-HR
152988,2014-12-29,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-HR
152989,2014-12-30,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-HR


In [18]:
# add month, year and season. Will be usefull to apply CRVA
df_bc_corrected = add_year_month_season(df_bc_corrected,'Date')

In [19]:
df_bc_corrected

Unnamed: 0,Date,Mean of the daily precipitation rate mm_per_day,Name project,Experiment,Model,Year,Month,Season
0,1980-01-01,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-LR,1980,Jan,Humid
1,1980-01-02,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-LR,1980,Jan,Humid
2,1980-01-03,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-LR,1980,Jan,Humid
3,1980-01-04,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-LR,1980,Jan,Humid
4,1980-01-05,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-LR,1980,Jan,Humid
...,...,...,...,...,...,...,...,...
152986,2014-12-27,3.800000,Gorongosa_EIB,historical,MPI-ESM1-2-HR,2014,Dec,Humid
152987,2014-12-28,6.332745,Gorongosa_EIB,historical,MPI-ESM1-2-HR,2014,Dec,Humid
152988,2014-12-29,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-HR,2014,Dec,Humid
152989,2014-12-30,0.000000,Gorongosa_EIB,historical,MPI-ESM1-2-HR,2014,Dec,Humid


# Export result as csv file

In [20]:
path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\NEX-GDDP-CMIP6-AllMoz\csv_file'

In [21]:
if not os.path.isdir(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y)+'-'+str(stop_y)+'_BiasCorrected')):
    os.makedirs(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y)+'-'+str(stop_y)+'_BiasCorrected'))

In [22]:
df_bc_corrected.to_csv(os.path.join(path,climate_var,climate_var+'_'+unit+'_'+resolution+'_'+str(start_y)+'-'+str(stop_y)+'_BiasCorrected',climate_var+'_'+unit+resolution+str(start_y)+'-'+str(stop_y)+'_BiasCorrected_OnlyGorongosa.csv'))