Bias correction based on the [python Package scikit-downscale](https://github.com/pangeo-data/scikit-downscale/blob/main/examples/2020ECAHM-scikit-downscale.ipynb)

Here, only the pointwise method to apply

First, comparison of observation and modelled data's behaviour

# User input

# Packages and functions

## Packages

In [28]:
import pandas as pd
import numpy as np

## Functions

# Project information
Those project were chosen based on the interest of the company (decide with SIPA and RAPY)

In [29]:
name_projects_data = np.array(['WTP_Mutua_EIB', 'Gorongosa_EIB', 'Chimoio_WTP_EIB', 'Pemba_EIB'])
name_projects = pd.Series(name_projects_data)

lon_projects_data = np.array([34.5927839939706, 34.07824286310398 , 33.47333313659342, 40.52545156033736])
lon_projects = pd.Series(lon_projects_data)

lat_projects_data = np.array([-19.495079648575242, -18.68063728746643, -19.125095255188334,-12.973942656747809])
lat_projects = pd.Series(lat_projects_data)

In [30]:
lon_projects

0    34.592784
1    34.078243
2    33.473333
3    40.525452
dtype: float64

# Comparaison between observational data and modeled data

## Observational data coming from NOAA
[Global Historical Climatology Network daily (GHCNd) | National Centers for Environmental Information (NCEI) (noaa.gov)](https://www.ncei.noaa.gov/products/land-based-station/global-historical-climatology-network-daily), climate data online

In [3]:
# path where the file is placed
path_file_NOAA = r'C:\Users\CLMRX\COWI\A248363 - Climate analysis - Documents\General\CRVA_tool\Master_thesis\Project\3 - Implementation\1 - Data\1-BC\NOAA-ClimateDataOnline\3370204.csv'

In [21]:
# read the information in the file
data_obs_NOAA = pd.read_csv(path_file_NOAA)
data_obs_NOAA
# unit of PRCP are mm
# unit of temperature are degrees Celsius

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,PRCP_ATTRIBUTES,TAVG,TAVG_ATTRIBUTES,TMAX,TMAX_ATTRIBUTES,TMIN,TMIN_ATTRIBUTES
0,MZM00067223,"MONTEPUEZ, MZ",-13.133,39.033,535.0,1974-04-05,2.0,",,S",23.8,"H,,S",29.0,",D,S",20.0,",,S"
1,MZM00067223,"MONTEPUEZ, MZ",-13.133,39.033,535.0,1974-06-17,0.0,",,S",18.2,"H,,S",27.0,",,S",11.0,",,S"
2,MZM00067223,"MONTEPUEZ, MZ",-13.133,39.033,535.0,1974-06-23,,,21.0,"H,,S",,,,
3,MZM00067223,"MONTEPUEZ, MZ",-13.133,39.033,535.0,1974-07-02,0.0,",,S",21.5,"H,,S",,,16.0,",,S"
4,MZM00067223,"MONTEPUEZ, MZ",-13.133,39.033,535.0,1974-07-03,0.0,",,S",20.5,"H,,S",,,16.0,",,S"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161770,MZ000067297,"BEIRA, MZ",-19.800,34.900,16.0,2020-12-27,,,28.8,"H,,S",32.3,",,S",,
161771,MZ000067297,"BEIRA, MZ",-19.800,34.900,16.0,2020-12-28,,,29.4,"H,,S",,,25.0,",,S"
161772,MZ000067297,"BEIRA, MZ",-19.800,34.900,16.0,2020-12-29,,,29.6,"H,,S",,,26.0,",,S"
161773,MZ000067297,"BEIRA, MZ",-19.800,34.900,16.0,2020-12-30,24.9,",,S",28.3,"H,,S",31.2,",,S",25.7,",,S"


In [None]:
# find which stations are of interest, which one are the closest to the point of interest

In [37]:
# save in a dataframe name, latitudes and longitudes informations for each station
df_station_NOAA=data_obs_NOAA.loc[:, ["NAME", "LATITUDE","LONGITUDE"]]
df_station_NOAA.drop_duplicates(inplace = True) # drop duplicates to only have name of the towns and latitudes and longitudes
df_station_NOAA.reset_index(drop=True,inplace = True)  # drop = true avoids to keep the former index
# inplace = True modifies the original dataframe

In [38]:
df_station_NOAA

Unnamed: 0,NAME,LATITUDE,LONGITUDE
0,"MONTEPUEZ, MZ",-13.133,39.033
1,"CHIMOIO, MZ",-19.117,33.467
2,"TETE, MZ",-16.183,33.583
3,"CHANGALANE, MZ",-26.283,32.183
4,"MOCIMBOA DA PRAIA, MZ",-11.362,40.355
5,"VILANKULO, MZ",-22.018,35.313
6,"PANDA INHAMBANE, MZ",-24.05,34.05
7,"PAFURI, SF",-22.45,31.317
8,"LICHINGA, MZ",-13.3,35.233
9,"PEMBA, MZ",-12.983,40.533


In [87]:
name_closest_station_to_project = [] # create an empty list to contain the name of the closest station to each project
index_closest_station_to_project = []
for i in np.arange(0,len(name_projects)):
    # calculate difference between the different coordinates
    df_station_NOAA['Diff latitude project '+str(i)] = abs(abs(df_station_NOAA['LATITUDE']) - abs(lat_projects[i]))
    df_station_NOAA['Diff longitude project '+str(i)] = abs(abs(df_station_NOAA['LONGITUDE']) - abs(lon_projects[i]))
    df_station_NOAA['Diff coordinates project '+str(i)] = df_station_NOAA['Diff latitude project '+str(i)]+df_station_NOAA['Diff longitude project '+str(i)]
    # register the name of the stations that are the closest to the projects and the index in df_station_NOAA corresponding to those closest stations
    name_closest_station_to_project.append(df_station_NOAA['NAME'].iloc[np.where(df_station_NOAA['Diff coordinates project '+str(i)]==min(df_station_NOAA['Diff coordinates project '+str(i)]))[0][0]])
    index_closest_station_to_project.append(np.where(df_station_NOAA['Diff coordinates project '+str(i)]==min(df_station_NOAA['Diff coordinates project '+str(i)]))[0][0])

# take off the duplicates from the list of name of station which are the closest to our projects and the indexes in the dataframe of those corresponding stations
name_closest_station_to_project_without_duplicates=list(set(name_closest_station_to_project))
index_closest_station_to_project_without_duplicates=list(set(index_closest_station_to_project))

In [104]:
for k in np.arange(len(index_closest_station_to_project_without_duplicates)):
    print('Name '+df_station_NOAA['NAME'][index_closest_station_to_project_without_duplicates[k]])
    print('Longitude '+str(df_station_NOAA['LONGITUDE'][index_closest_station_to_project_without_duplicates[k]]))
    print('Latitude '+str(df_station_NOAA['LATITUDE'][index_closest_station_to_project_without_duplicates[k]]))
    print('\n')

Name CHIMOIO, MZ
Longitude 33.467
Latitude -19.117


Name BEIRA, MZ
Longitude 34.9
Latitude -19.8


Name PEMBA, MZ
Longitude 40.533
Latitude -12.983




In [71]:
data_obs_NOAA_station_of_interest = data_obs_NOAA.loc[(data_obs_NOAA['NAME']==name_closest_station_to_project[0]) or (data_obs_NOAA['NAME']==name_closest_station_to_project[1]) or (data_obs_NOAA['NAME']==name_closest_station_to_project[2]) or (data_obs_NOAA['NAME']==name_closest_station_to_project[3])]

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [84]:
data_obs_NOAA[(data_obs_NOAA['NAME']==name_closest_station_to_project[2]) | (data_obs_NOAA['NAME']==name_closest_station_to_project[1])]

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,PRCP_ATTRIBUTES,TAVG,TAVG_ATTRIBUTES,TMAX,TMAX_ATTRIBUTES,TMIN,TMIN_ATTRIBUTES
3019,MZ000067295,"CHIMOIO, MZ",-19.117,33.467,732.0,1970-01-01,0.4,",,Q",,,30.0,",,Q",20.7,",,Q"
3020,MZ000067295,"CHIMOIO, MZ",-19.117,33.467,732.0,1970-01-02,0.0,",,Q",,,30.5,",,Q",21.2,",,Q"
3021,MZ000067295,"CHIMOIO, MZ",-19.117,33.467,732.0,1970-01-03,0.0,",,Q",,,28.7,",,Q",20.5,",,Q"
3022,MZ000067295,"CHIMOIO, MZ",-19.117,33.467,732.0,1970-01-04,0.0,",,Q",,,28.8,",,Q",19.2,",,Q"
3023,MZ000067295,"CHIMOIO, MZ",-19.117,33.467,732.0,1970-01-05,0.0,",,Q",,,30.1,",,Q",20.2,",,Q"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161770,MZ000067297,"BEIRA, MZ",-19.800,34.900,16.0,2020-12-27,,,28.8,"H,,S",32.3,",,S",,
161771,MZ000067297,"BEIRA, MZ",-19.800,34.900,16.0,2020-12-28,,,29.4,"H,,S",,,25.0,",,S"
161772,MZ000067297,"BEIRA, MZ",-19.800,34.900,16.0,2020-12-29,,,29.6,"H,,S",,,26.0,",,S"
161773,MZ000067297,"BEIRA, MZ",-19.800,34.900,16.0,2020-12-30,24.9,",,S",28.3,"H,,S",31.2,",,S",25.7,",,S"


# Data at the same emplacement coming from NEX GDDP CMIP6
 ADD LINK NEX GDDP CMIP6 AND TECHNICAL NOTE

In [105]:
path_NEX_GDDP_CMIP6_EmplacementStation = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\NEX-GDDP-CMIP6-AllMoz\csv_file\pr\pr_mm_per_day_day_1970-2014\EmplacementStationNOAA_pr_1970-2014_projectsMoz.csv'

In [106]:
data_NEX_GDDP_CMIP6_EmplacementStation = pd.read_csv(path_NEX_GDDP_CMIP6_EmplacementStation)

In [107]:
data_NEX_GDDP_CMIP6_EmplacementStation

Unnamed: 0,Name station,Experiment,Model,Latitude,Longitude,Date,Mean of the daily precipitation rate mm/day
0,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,01-01-1970,12.524136
1,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,02-01-1970,8.813054
2,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,03-01-1970,15.381735
3,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,04-01-1970,4.983678
4,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,05-01-1970,2.094941
...,...,...,...,...,...,...,...
739615,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,27-12-2014,3.778890
739616,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,28-12-2014,5.969581
739617,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,29-12-2014,3.112424
739618,"BEIRA, MZ",historical,TaiESM1,-19.875,34.875,30-12-2014,0.000000


## Compare Pemba station

In [143]:
# meteorological data from NOAA
# unit of precipitation is mm
pr_obs_NOAA=data_obs_NOAA[['DATE','PRCP']][data_obs_NOAA['NAME']=='PEMBA, MZ'].reset_index(drop=True)
pr_obs_NOAA

Unnamed: 0,DATE,PRCP
0,1973-01-20,
1,1973-02-07,
2,1973-02-08,
3,1973-02-16,
4,1973-04-04,
...,...,...
11363,2020-12-27,0.0
11364,2020-12-28,
11365,2020-12-29,
11366,2020-12-30,


In [139]:
# trier par date pour selectionner seulement les lignes d'interet dans les deux tableaux
pr_obs_NOAA['Year']
pr_obs_NOAA[['DATE']]#.iloc[0][0][0:4]

Unnamed: 0,DATE
0,1973-01-20
1,1973-02-07
2,1973-02-08
3,1973-02-16
4,1973-04-04
...,...
11363,2020-12-27
11364,2020-12-28
11365,2020-12-29
11366,2020-12-30


In [None]:
pr_obs_NOAA[pr_obs_NOAA[['DATE']].iloc[0][0][0:4]]

In [109]:
# data from NEX GDDP CMIP6 at the emplacement of the station Pemba
data_NEX_GDDP_CMIP6_EmplacementStation[data_NEX_GDDP_CMIP6_EmplacementStation['Name station']=='PEMBA, MZ']

Unnamed: 0,Name station,Experiment,Model,Latitude,Longitude,Date,Mean of the daily precipitation rate mm/day
0,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,01-01-1970,12.524136
1,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,02-01-1970,8.813054
2,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,03-01-1970,15.381735
3,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,04-01-1970,4.983678
4,"PEMBA, MZ",historical,ACCESS-CM2,-12.875,40.625,05-01-1970,2.094941
...,...,...,...,...,...,...,...
246535,"PEMBA, MZ",historical,TaiESM1,-12.875,40.625,27-12-2014,0.000000
246536,"PEMBA, MZ",historical,TaiESM1,-12.875,40.625,28-12-2014,0.000000
246537,"PEMBA, MZ",historical,TaiESM1,-12.875,40.625,29-12-2014,0.000000
246538,"PEMBA, MZ",historical,TaiESM1,-12.875,40.625,30-12-2014,0.179064


In [142]:
data_NEX_GDDP_CMIP6_EmplacementStation['Date'][0]

'01-01-1970'

In [None]:
# do box plot for different model

In [None]:
# plot temporal evolution accross years for each models