# User input

In [1]:
start_year_past = 2021
stop_year_past = 2040
start_year_future = 2041
stop_year_future = 2060

# Import package

In [2]:
from scipy import stats
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from scipy.stats import gumbel_r
from scipy.stats import gumbel_l
import os
import os.path
import math

from Functions_Indicators_precipitation import changes_in_indicators

# Import data

In [3]:
out_path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\NEX-GDDP-CMIP6'
csv_path = os.path.join(out_path,'csv_file','pr_day_2021-2060','All_projects_moz_precipitation_2021-2060_without_month_year.csv')
precipitation_2021_2060 = pd.read_csv(csv_path,header=[0,1],index_col=[0,1,2,3,4])*86400 # units mm/day :  1 kg/m2/s = 86400 mm/day
# source: https://www.researchgate.net/post/How-do-I-convert-ERA-Interim-precipitation-estimates-from-kg-m2-s-to-mm-day
list_time_2021_2060=pd.date_range('01-01-2021','31-12-2060', freq='D').strftime('%d-%m-%Y').values.tolist()

In [4]:
precipitation_2021_2060_copy = precipitation_2021_2060.copy(deep=True) # make a coåy that can be modified without changin the original

In [5]:
precipitation_2021_2060_copy

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Longitude,Longitude,Longitude,Longitude
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,33.625,39.875,36.875,40.375
Name project,Experiment,Model,Date,Latitude,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
PT_Revubue_2_Rev_2_01,ssp245,ACCESS-CM2,01-01-2021,-16.125,5.057179,,,
PT_Revubue_2_Rev_2_01,ssp245,ACCESS-CM2,02-01-2021,-16.125,17.624178,,,
PT_Revubue_2_Rev_2_01,ssp245,ACCESS-CM2,03-01-2021,-16.125,7.632648,,,
PT_Revubue_2_Rev_2_01,ssp245,ACCESS-CM2,04-01-2021,-16.125,4.736492,,,
PT_Revubue_2_Rev_2_01,ssp245,ACCESS-CM2,05-01-2021,-16.125,4.981353,,,
...,...,...,...,...,...,...,...,...
PT__Dumping_Site,ssp370,TaiESM1,27-12-2060,-13.125,,,,0.000000
PT__Dumping_Site,ssp370,TaiESM1,28-12-2060,-13.125,,,,0.435942
PT__Dumping_Site,ssp370,TaiESM1,29-12-2060,-13.125,,,,6.300328
PT__Dumping_Site,ssp370,TaiESM1,30-12-2060,-13.125,,,,8.168522


In [None]:
# delete model 'NESM3' that does not have any value
try:
    precipitation_2021_2060_copy = precipitation_2021_2060.copy(deep=True)
    precipitation_2021_2060_copy.drop('NESM3', level=2, axis=0, inplace=True)
    precipitation_2021_2060_copy.loc[(precipitation_2021_2060_copy.index.levels[0][0],precipitation_2021_2060_copy.index.levels[1][0],'NESM3')]
except KeyError:
    print('KeyError')

In [None]:
# treatment of dataframe to make useable
df_years=precipitation_2021_2060_copy.copy(deep=True)
#df_year = df_year1.loc[('PT_Revubue_2_Rev_2_01')] # select one project
df_years.columns = df_years.columns.droplevel(0) # drop first level of column name
df_years['33.625'] = df_years['33.625'].fillna(0)
df_years['39.875'] = df_years['39.875'].fillna(0)
df_years['36.875'] = df_years['36.875'].fillna(0)
df_years['40.375'] = df_years['40.375'].fillna(0)
df_years['33.625'] = df_years['33.625'] + df_years['39.875'] + df_years['36.875'] + df_years['40.375']
df_years = df_years[['33.625']]
df_years=df_years.rename(columns={df_years.columns[0]:'Precipitation mm/day period 2021-2060'})
df_years=df_years.droplevel(level=4) # drop latitude index
df_years=df_years.reset_index()

Year = df_years[['Date']].values.reshape(len(df_years[['Date']].values),)
for i in np.arange(0,len(df_years[['Date']].values)):
    Year[i]=int(Year[i][6:10])

df_years['Year'] = Year
df_years

In [None]:
# select past datas, between start_year_future and stop_year_future
df_past = df_years.loc[df_years['Year'].between(start_year_past,stop_year_past)]
df_past

In [None]:
# select future data, between start_year_future and stop_year_future
df_future = df_years.loc[df_years['Year'].between(start_year_future,stop_year_future)]
df_future

## Average yearly precipitation

In [None]:
from Functions_Indicators_precipitation import yearly_avg_pr

In [None]:
# For historical period
title_column = 'Yearly average precipitation mm/year period '+str(start_year_past)+'-'+str(stop_year_past)
df_years_avg_past_distribution= yearly_avg_pr(df_past,title_column)

In [None]:
# For future period
title_column = 'Yearly average precipitation mm/year period '+str(start_year_future)+'-'+str(stop_year_future)
df_years_avg_future_distribution= yearly_avg_pr(df_future,title_column)

In [None]:
# Comparaison between past and future period

In [None]:
changes_past_future_indicator = pd.DataFrame()
changes_past_future_indicator1 = changes_in_indicators(df_years_avg_past_distribution,df_years_avg_future_distribution,'Average yearly precipitation','mm/year','Incremental rainfall change')
changes_past_future_indicator=pd.concat([changes_past_future_indicator,changes_past_future_indicator1])
changes_past_future_indicator

In [None]:
df_past

In [None]:
# boxplot

df_yearly_avg = df_past.copy(deep =True)
df_yearly_avg=df_yearly_avg.drop(labels='Date',axis=1)
df_yearly_avg=df_yearly_avg.rename(columns={df_yearly_avg.columns[3]:'Yearly precipitation mm/year'})
df_yearly_avg = df_yearly_avg.groupby(['Name project','Experiment','Model','Year']).sum() # sum per year
#df_yearly_avg = df_yearly_avg.groupby(['Name project','Experiment','Model']).mean()

dict_boxplot = plt.boxplot([df_yearly_avg.loc[('PT_Revubue_2_Rev_2_01','ssp126'),'Yearly precipitation mm/year'].values.reshape(len(df_yearly_avg.loc[('PT_Revubue_2_Rev_2_01','ssp126'),'Yearly precipitation mm/year']),),df_yearly_avg.loc[('PT_Revubue_2_Rev_2_01','ssp245'),'Yearly precipitation mm/year'].values.reshape(len(df_yearly_avg.loc[('PT_Revubue_2_Rev_2_01','ssp245'),'Yearly precipitation mm/year']),),df_yearly_avg.loc[('PT_Revubue_2_Rev_2_01','ssp370'),'Yearly precipitation mm/year'].values.reshape(len(df_yearly_avg.loc[('PT_Revubue_2_Rev_2_01','ssp370'),'Yearly precipitation mm/year']),),df_yearly_avg.loc[('PT_Revubue_2_Rev_2_01','ssp585'),'Yearly precipitation mm/year'].values.reshape(len(df_yearly_avg.loc[('PT_Revubue_2_Rev_2_01','ssp126'),'Yearly precipitation mm/year']),)],notch=True, whis =(10,90),widths = 0.15, patch_artist=True,labels=df_yearly_avg.index.levels[1].tolist())
# ... present boxplot over the period for each models
# this functions returns varius parameters of the boxplot in the dict_boxplot. This funcitons also returns an image of it
# here, numpy_array is a vector. But can also include array with several columns. Each columns will have a boxplot
# 'notch' is true to enhance part where the median is
# 'whis' is the percentile value for the whiskers, every data out of the range indicted by those 2 floats are represented as points
# 'widths' determine width of the boxes
# 'patch_artist' colors the boxplots
# 'labels' gives a name to every column included in the data part

#str = 'Annual maximum precipitation mm/day\naccross different scenarios between 2030 and 2060'
#title_png = 'Annual maximum precipitation mm.day accross different scenarios between 2030 and 2060.png'
#plt.title(str)
plt.xlabel('Scenarios')
plt.ylabel('Yearly precipitation mm/year for project PT_Revubue_2_Rev_2_01')
#path_boxplot = os.path.join(path_figure,'Boxplots')
#if not os.path.isdir(path_boxplot):
#    os.makedirs(path_boxplot)
#plt.savefig(os.path.join(path_boxplot,title_png),format ='png') # savefig or save text must be before plt.show. for savefig, format should be explicity written
plt.show()

In [None]:
df_yearly_avg = df_yearly_avg.groupby(['Name project','Experiment','Model']).mean()
title_column = 'Yearly average precipitation mm/year'
df_yearly_avg=df_yearly_avg.rename(columns={df_yearly_avg.columns[0]:title_column})
df_yearly_avg

In [None]:
dict_boxplot = plt.boxplot([df_yearly_avg.loc[(df_yearly_avg.index.levels[0][0]),title_column].values.reshape(len(df_yearly_avg.loc[(df_yearly_avg.index.levels[0][0]),title_column]),),df_yearly_avg.loc[(df_yearly_avg.index.levels[0][1]),title_column].values.reshape(len(df_yearly_avg.loc[(df_yearly_avg.index.levels[0][1]),title_column]),),df_yearly_avg.loc[(df_yearly_avg.index.levels[0][2]),title_column].values.reshape(len(df_yearly_avg.loc[(df_yearly_avg.index.levels[0][2]),title_column]),),df_yearly_avg.loc[(df_yearly_avg.index.levels[0][3]),title_column].values.reshape(len(df_yearly_avg.loc[(df_yearly_avg.index.levels[0][3]),title_column]),),df_yearly_avg.loc[(df_yearly_avg.index.levels[0][4]),title_column].values.reshape(len(df_yearly_avg.loc[(df_yearly_avg.index.levels[0][4]),title_column]),),df_yearly_avg.loc[(df_yearly_avg.index.levels[0][5]),title_column].values.reshape(len(df_yearly_avg.loc[(df_yearly_avg.index.levels[0][5]),title_column]),),df_yearly_avg.loc[(df_yearly_avg.index.levels[0][6]),title_column].values.reshape(len(df_yearly_avg.loc[(df_yearly_avg.index.levels[0][6]),title_column]),)],notch=True, whis =(10,90),widths = 0.15, patch_artist=True,labels=df_yearly_avg.index.levels[0].tolist())
# ... present boxplot over the period for each models
# this functions returns varius parameters of the boxplot in the dict_boxplot. This funcitons also returns an image of it
# here, numpy_array is a vector. But can also include array with several columns. Each columns will have a boxplot
# 'notch' is true to enhance part where the median is
# 'whis' is the percentile value for the whiskers, every data out of the range indicted by those 2 floats are represented as points
# 'widths' determine width of the boxes
# 'patch_artist' colors the boxplots
# 'labels' gives a name to every column included in the data part

#str = 'Annual maximum precipitation mm/day\naccross different scenarios between 2030 and 2060'
#title_png = 'Annual maximum precipitation mm.day accross different scenarios between 2030 and 2060.png'
#plt.title(str)
plt.xlabel('Projects')
plt.ylabel('Average yearly precipitation mm/year for project PT_Revubue_2_Rev_2_01')
#path_boxplot = os.path.join(path_figure,'Boxplots')
#if not os.path.isdir(path_boxplot):
#    os.makedirs(path_boxplot)
#plt.savefig(os.path.join(path_boxplot,title_png),format ='png') # savefig or save text must be before plt.show. for savefig, format should be explicity written
plt.show()

## Seasonal average precipitation

In [None]:
from Functions_Indicators_precipitation import avg_dry_season_precipitation

In mozambique, dry season from April to september. Humid season from October to March

In [None]:
# historic

In [None]:
title_column1 = 'Yearly average precipitation mm/season per year for the period '+str(start_year_past)+'-'+str(stop_year_past)
pr_past_dry_season_mean_distribution = avg_dry_season_precipitation(df_past,title_column1)

In [None]:
# future

In [None]:
title_column1 = 'Yearly average precipitation mm/dry season for the period '+str(start_year_future)+'-'+str(stop_year_future)
pr_future_dry_season_mean_distribution = avg_dry_season_precipitation(df_future,title_column1)

In [None]:
pr_future_dry_season_mean_distribution

In [None]:
# Comparaison between past and future period

In [None]:
changes_past_future_indicator1 = changes_in_indicators(pr_past_dry_season_mean_distribution,pr_future_dry_season_mean_distribution,'Average dry season precipitation','mm/(months of dry season)','Incremental rainfall change')
changes_past_future_indicator=pd.merge(changes_past_future_indicator, changes_past_future_indicator1, left_index=True, right_index=True, how='outer')

In [None]:
changes_past_future_indicator

## Return period

In [None]:
 #INDICATOR NOT FINISHED
#from Functions_Indicators_precipitation import threshold_coresponding_to_return_period
from Functions_Indicators_precipitation import dataframe_threshold_coresponding_to_return_period
#from Functions_Indicators_precipitation import dataframe_future_return_period_of_1_day_event

In [None]:
# past

In [None]:
return_period_past=dataframe_threshold_coresponding_to_return_period(df_past)
return_period_past

In [None]:
# give each time different Z values


# ca va etre bizarre dans avec distribution, parce qu'avant juste valeur de 100, donc oui changement, meme valeur pour median, p10 et p90 pour valeur actuel
df_copy=df_future.copy(deep=True)
df_copy=df_copy.drop(labels='Date',axis=1)
df_max_future = df_copy.groupby(['Name project','Experiment','Model','Year']).max() # maximum

midx = pd.MultiIndex.from_product([list(set(df_copy[df_copy.columns[0]])),list(set(df_copy[df_copy.columns[1]])),list(set(df_future[df_copy.columns[2]]))],names=['Name project','Experiment', 'Model'])
cols = ['Return period years of current 100 year return period']
return_period_future = pd.DataFrame(data = [], 
                            index = midx,
                            columns = cols)
for name_p in return_period_future.index.levels[0].tolist():
    for ssp in return_period_future.index.levels[1].tolist():
        for model in return_period_future.index.levels[2].tolist():
            Z=df_max_future.loc[(name_p,ssp,model)].values.reshape(len(df_max_future.index.levels[3]),)
            (loc1,scale1)=stats.gumbel_r.fit(Z) # return the function necessary to establish the continous function
            #return_period_coresponding = 1/(1-gumbel_r.cdf(return_period_past.loc[(name_p,ssp,model),('Value for return period 100 years mm/day')],loc1,scale))
            #return_period_future.loc[(name_p,ssp,model),('Return period years of current 100 year return period')] = 1/(1-gumbel_r.cdf(return_period_past.loc[(name_p,ssp,model),('Value for return period 100 years mm/day')],loc1,scale))
            return_period_future.loc[(name_p,ssp,model),('Return period years of current 100 year return period')] = stats.gumbel_r.ppf(0.99,loc=loc1,scale=scale1)
            print('former value corresponding to 100 years return period '+str(return_period_past.loc[(name_p,ssp,model),('Value for return period 100 years mm/day')]))
            print('p non exceedance '+str(gumbel_r.cdf(return_period_past.loc[(name_p,ssp,model),('Value for return period 100 years mm/day')],loc1,scale)))
            print('New return period '+str(return_period_future.loc[(name_p,ssp,model),('Return period years of current 100 year return period')]))
            
            # Q100=stats.gumbel_r.ppf(0.99,loc=loc,scale=scale)

In [None]:
return_period_future

In [None]:
return_period_future_distribution = return_period_future.astype(int).groupby(['Name project']).describe(percentiles=[.1, .5, .9])
return_period_future_distribution

In [None]:
return_period_past_100 = return_period_future.copy(deep=True)
return_period_past_100['Return period years of current 100 year return period']=100
return_period_past_100=return_period_past_100.rename(columns={return_period_past_100.columns[0]:'Current return period of interest years'})
return_period_past_100

In [None]:
return_period_past_100_distribution = return_period_past_100.astype(int).groupby(['Name project']).describe(percentiles=[.1, .5, .9])
return_period_past_100_distribution

In [None]:
# Comparaison between past and future period
changes_past_future_indicator1 = changes_in_indicators(return_period_past_100_distribution,return_period_future_distribution,'Future return period of one day with a current return period of 100 years','[years]','Extreme rainfall change')
changes_past_future_indicator=pd.merge(changes_past_future_indicator, changes_past_future_indicator1, left_index=True, right_index=True, how='outer')
changes_past_future_indicator

## N-day event

### 1-day event

In [None]:
from Functions_Indicators_precipitation import dataframe_1_day_event

In [None]:
# past
df_1_day_event_past = dataframe_1_day_event(df_past)
df_1_day_event_past

In [None]:
# past
df_1_day_event_future = dataframe_1_day_event(df_future)
df_1_day_event_future

In [None]:
df_1_day_event_past_distribution = df_1_day_event_past.astype(int).groupby(['Name project']).describe(percentiles=[.1, .5, .9]) # astype(int) necessary to have stats min, max and percentiles
df_1_day_event_future_distribution = df_1_day_event_future.astype(int).groupby(['Name project']).describe(percentiles=[.1, .5, .9]) # astype(int) necessary to have stats min, max and percentiles

In [None]:
# Comparaison between past and future period
changes_past_future_indicator1 = changes_in_indicators(df_1_day_event_past_distribution,df_1_day_event_future_distribution,'Maximum one day rainfall','mm','Extreme rainfall change')
changes_past_future_indicator=pd.merge(changes_past_future_indicator, changes_past_future_indicator1, left_index=True, right_index=True, how='outer')
changes_past_future_indicator

### 5-day event

In [None]:
from Functions_Indicators_precipitation import dataframe_n_day_event
from Functions_Indicators_precipitation import df_to_csv

In [None]:
# file of 5 day event
title_file = 'pr_5_day_event_'+str(start_year_past)+'-'+str(stop_year_future)+'.csv'
path_for_csv = os.path.join(out_path,'csv_file','pr_5_day_event')
path_to_csv_5_day_event = os.path.join(path_for_csv,title_file)
if not os.path.isfile(path_to_csv_5_day_event):
    print('The file has not been computed')
    df_5_day_event=dataframe_n_day_event(precipitation_2021_2060_copy,5)
    # first parameter is a dataframe, second is the number of days
    path_to_csv_5_day_event = df_to_csv(path_for_csv,title_file)
else:
    print('The file has already been computed')
    df_5_day_event = pd.read_csv(path_to_csv_5_day_event,header=[0],index_col=[0,1,2,3])
df_5_day_event

In [None]:
df_5_day_event = df_5_day_event.reset_index()

Year = df_5_day_event[['Date']].values.reshape(len(df_5_day_event[['Date']].values),)
for i in np.arange(0,len(df_5_day_event[['Date']].values)):
    Year[i]=int(Year[i][6:10])

df_5_day_event['Year'] = Year
df_5_day_event

In [None]:
df_5_day_event = df_5_day_event.groupby(['Name project', 'Experiment','Model','Year']).max().drop(labels='Date',axis=1)
df_5_day_event

In [None]:
df_5_day_event_bis = df_5_day_event.reset_index()
df_5_day_event_past=df_5_day_event_bis.loc[df_5_day_event_bis['Year'].between(start_year_past,stop_year_past)]
df_5_day_event_future=df_5_day_event_bis.loc[df_5_day_event_bis['Year'].between(start_year_future,stop_year_future)]

In [None]:
df_5_day_event_bis

In [None]:
df_5_day_event_past

In [None]:
df_5_day_event_future

In [None]:
NaN_values= df_5_day_event_past.loc[df_5_day_event_past['Maximum 5 days rainfall mm'].isnull()]
model_to_delete=list(set(NaN_values['Model'].values))

In [None]:
df_5_day_event_past.Model

In [None]:
# delete Nan Model
df_5_day_event_past = df_5_day_event_past[df_5_day_event_past.Model.isin(model_to_delete) == False]
df_5_day_event_past

In [None]:
df_5_day_event_past = df_5_day_event_past.groupby(['Name project', 'Experiment','Model','Year']).max()#.drop(labels=['level_0','index'],axis=1)
df_5_day_event_future = df_5_day_event_future.groupby(['Name project', 'Experiment','Model','Year']).max()#.drop(labels=['level_0','index'],axis=1)

In [None]:
df_5_day_event_future

In [None]:
df_5_day_event_past_distribution = df_5_day_event_past.astype(int).groupby(['Name project']).describe(percentiles=[.1, .5, .9]) # astype(int) necessary to have stats min, max and percentiles
df_5_day_event_future_distribution = df_5_day_event_future.astype(int).groupby(['Name project']).describe(percentiles=[.1, .5, .9]) # astype(int) necessary to have stats min, max and percentiles
df_5_day_event_future_distribution

In [None]:
# Comparaison between past and future period
changes_past_future_indicator1 = changes_in_indicators(df_5_day_event_past_distribution,df_5_day_event_future_distribution,'Maximum 5 days rainfall','mm','Extreme rainfall change')
changes_past_future_indicator=pd.merge(changes_past_future_indicator, changes_past_future_indicator1, left_index=True, right_index=True, how='outer')
changes_past_future_indicator

In [None]:
changes_past_future_indicator['Extreme rainfall change']

# Exposure level

In [None]:
from Functions_Indicators_precipitation import level_exposure
# Are project vulnerable ? Exposure level
#from Functions_Indicators_precipitation import level_exposure
Exposure=level_exposure(changes_past_future_indicator)
Exposure