Color map for well distributed colors

In [12]:
#Colors to make every graph costant
colorMapWorld = {}

'''
This funcion allows to add consistent colors on the countries trough the project plots
       states -> list of the names of the countries
       
As output, colorMapWorld will be filled
'''

def colorMap(states):
    for i in range(len(states)):
        color = list(np.random.random( size=3))
        color.append(1)
        colorMapWorld[states[i]]=color
   

Function to plot data about the growth starting from a specific number of cases. The countries for which the values are plotted are passed to the funcion as a list.

In [3]:
def plotGrowthDataFromFirstCases(dataset, countries_list, column_reference,
                               graph_title, starting_number, y_axis_max_limit = -1):
    """This procedure allow to plot data relative to the growth of
        a specific value starting from the first cases.
        Parameters:
        dataset -> is the dataset where the data are retreived
        countries_list -> is the list of countries which data must be plotted
        column_reference -> is the name of the column under analysis
        graph_title -> is the name of the graph that must be showed
        starting_number -> is the minimum number of cases from each the growth
            must be plotted
        y_axis_max_limit -> is the maximum value for the y axis plot, default value is -1,
            that means plot with the maximum range
    """
    plt.figure(figsize=(15,7.5))
    for country in countries_list:
        country_growth_total_cases = dataset[dataset['Country'] == country]
        country_growth_total_cases = country_growth_total_cases[country_growth_total_cases[column_reference] >= starting_number]
        plt.plot(range(len(country_growth_total_cases)),country_growth_total_cases[column_reference],color=colorMapWorld[country], label = country)
    plt.xticks(rotation = 90)
    plt.legend()
    
    plt.title(graph_title)
    if (y_axis_max_limit > 0):
        plt.ylim(0, y_axis_max_limit)
    plt.xlabel("Days from the starting point")
    plt.ylabel("Total number")
    plt.show()

Define a function to evaluate the number of cases per million of inhabitants.

In [13]:
def evaluateRatePerNumberOfMillion(dataset, ref_column, new_column_name):
    """This funcion allows to add a new columns to the input dataset
       containing the number of cases of the reference column out of the
       population.
       Parameters:
       dataset -> is the input dataset containing all the necessary information.
           It is import it has a column "Population" with the total number of
           inhabitants.
       ref_column -> is the column under evaluation
       new_column_name -> is the name of the column where the value is stored
    """
    
    dataset[new_column_name] = dataset[ref_column]/dataset['Population'] * 1000000
    return dataset

Define a function that create a new dataframe containing the growth rate for each country. It is possible to specify the time interval to consider, expressed in days.

In [15]:
def evaluateGrowthRate(dataset, number_of_days, reference_column):
    """This function allows to evaluate the growth rate of the reference
       column in input, gathering the data from the dataset in input with
       a specific delay expressed in days. The starting and ending date
       considered is based by the minumum and maximum date present in the
       input dataset. The dataframe in input must have a column 'Date'
       containing the list of reference date. The value returned is a
       new dataframe containing for each country a specific column and the
       value in each row is the percentage growth calculated from the previous 
       date.
       Parameters:
       dataset -> is the dataset to gather the data
       number_of_days -> is the frequency in which the evaluation is done
       reference_column -> is the column in the dataset in input to consider
           for evaluating the growth
    """
    #get the minimum date to consider
    minimum_date = dataset['Date'].min()
    #get the maximum date to consider
    maximum_date = dataset['Date'].max()
    #create the list of country to consider
    list_of_country = dataset['Country'].unique()
    #Create a new dataframe with the list of country in each column
    #and the list of date as index
    date_list = pd.date_range(start = minimum_date, end = maximum_date, 
                              freq = str(number_of_days) + 'D')
    #create a new dataframe with country as columns and date as index
    #filled with 0 values
    zero_data = np.zeros(shape=(len(date_list),len(list_of_country)))
    df_growth = pd.DataFrame(zero_data, index = date_list, columns = list_of_country)
    #iterate on all the index date
    for index_date in range(len(date_list)):
        #first row is not considered because there is not a previus value
        #for which the growth can be evaluated
        if index_date == 0:
            continue
        else:
            #get the string date that has to be evaluated
            current_date = str(date_list.values[index_date])[:10]
            #get the previous date
            previous_date = str(date_list.values[index_date - 1])[:10]
            #iterate on all the countries present in the input dataset
            for country in list_of_country:
                #get the list of value available for the country in exam
                country_values = dataset.loc[dataset['Country'] == country]
                #get the row value for the date/country in exame
                current_value = country_values.loc[dataset['Date'] == current_date]
                #get the previous row value for the date/country in exame
                previous_value = country_values.loc[dataset['Date'] == previous_date]
                #verify that the two values are not empty, otherwise skip to the next
                #iteration
                #get all death
                total_value=country_values[reference_column].iloc[-1]
                #total_value=country_values.sum[reference_column]
                if len(current_value) > 0 and len(previous_value) > 0:
                    #if values are not empty, extract the two number used to evaluate
                    #the growth
                    current_value = current_value[reference_column].values[0]
                    previous_value = previous_value[reference_column].values[0]
                    
                    #verifies that the previous value is not 0 to avoid division error
                    if previous_value > 0:
                        #evaluate the percentage growth between the 2 consecutive values
                        growth_value = (current_value - previous_value) / total_value * 100
                        #set the evaluated value in the dataframe, at the right location
                        df_growth.loc[current_date,country] = growth_value
    #return the new dataframe with the outcomes
    return df_growth

Define a function to create a matrix of value

In [5]:
def get_matrix_dataset(dataset, column_to_consider):
    """
    This function produce a new dataframe where all the columns
    represent a country, each row represent a date and the value
    of the matrix is the value specified in the input 'column_to_consider'
    Parameters:
    dataset -> is the dataset organized in rows
    column_to_consider -> is the value contained in the matrix
    """
    #get the minimum date to consider
    minimum_date = dataset['Date'].min()
    #get the maximum date to consider
    maximum_date = dataset['Date'].max()
    #create the list of country to consider
    country_list = dataset['Country'].unique()
    #set Date column as index
    dataset.set_index('Date',inplace = True)
    #create a new dataframe with the list of country in each column
    #and the list of date as index
    date_list = pd.date_range(start = minimum_date, end = maximum_date, 
                              freq = '1' + 'D')
    zero_data = np.zeros(dtype=int, shape=(len(date_list),len(country_list)))
    df = pd.DataFrame(zero_data, index = date_list, columns = country_list)
    for date in date_list:
        date = str(date)[:10]
        for country in country_list:
            date_rows = dataset.loc[dataset['Country'] == country]
            try:
                value = date_rows.loc[date, column_to_consider]
                df.loc[date,country] = value
            except:
                pass
    return df

Define a function to evaluate the moving average.

In [13]:
def evaluate_moving_average(dataset, period, time_series = True):
    '''
    This function take a time series matrix dataset in input,  
    if not it must be present a column 'Date', and a column for each
    country. Create a new dataset with the same column but with the
    value of each cell that is the average of the previous n value
    where n is the period in input. The first n rows of the original
    dataset are truncated in the final dataset.
    Parameters:
    dataset -> input dataset where the moving average is computed.
    period -> is the number of sample to evaluate the moving average.
    time_serie -> specify if the dataset in input is a time series.
    '''
    #reset data index if necessary
    if time_series:
        dataset.reset_index(inplace = True)
        dataset.rename(columns = {'index':'Date'}, inplace = True)
    
    #create a new empty dataframe with a number of rows less of the
    #period of time input but with the same country column
    date_list = dataset['Date'][period:]
    #set Date column as index
    dataset.set_index('Date',inplace = True)
    country_list = dataset.columns
    #create 0 value matrix to insert into the new dataframe
    zero_data = np.zeros(dtype=int, shape=(len(date_list),len(country_list)))
    df = pd.DataFrame(zero_data, index = date_list, columns = country_list)
    
    #evaluate the moving average of the input dataset and 
    #insert the value in the new dataset.
    #Iterate on each date
    for date_index in range(len(date_list)):
        #Iterate on each country
        for country in country_list:
            #Evaluate the average of the previous n-1 cell and the 
            #current cell for the country in input
            average = dataset.iloc[date_index-period:date_index][country].mean()
            #store the average in the new dataset
            df.iloc[date_index][country] = average
    return df

'''
Input:
data-> Dataset
window_size-> The size of the moving mean

Output:
An array with moving averages for each rows of the dataset
'''

def moving_mean(data,window_size):
    i=0
    
    moving_averages=[]
    while i < len(data) - window_size + 1:
        this_window = data[i : i + window_size]
        #print(this_window[0][1:])
        window_average = sum(this_window[1:]) / window_size
        moving_averages.append(window_average)
        i += 1

    return moving_averages


Plot for the lockdown moving average

In [11]:
'''
Inputs:
Checks if it's the peak of the curve, or it hasn't reach it yet
column-> it's the dataset column we are analyzing. Ex: Afghanistan,Italy ecc...
peakIndex-> It's the supposed peak that we have to check

Output->
It returns 0 if its not the peak yet, otherwise it returns the peak as it is
'''
def peakChecker(column,peakIndex):
    if(peakIndex+datetime.timedelta(days=1) in column.index): #checking if there is a day after the peak
        return peakIndex
                    
                    
    else:
        return 0



      
'''
A certain number of graphs, one for each country in the top ten/list of country we use as a parameter
with a lockdown line and a counter

Inputs:
top_ten-> the top ten/list of the countries 
df_lockdown_states->dataset with the lockdown information
df-> dataset with the features
name-> name of the Y axis of the graph


Output->
Prints the  graphs
'''    
def plotWithLockdown(top_ten,df_lockdown_states,df,name):
    i=0 #Subplot indexer, can be from 0 to 4 and indexes the rows
    j=0 #Subplot indexer, can be from 0 to 1 and indexes the columns
    graphs=int(len(top_ten)/2)
    if(len(top_ten)%2==1):
        graphs=int(len(top_ten)/2)+1
    fig, axs = plt.subplots(graphs, 2,figsize=(15,20)) #Subplot 5x2
    if(len(top_ten)%2==1):
        fig.delaxes(axs[graphs-1,1]) #if there are no even number of countries, we procede to make an odd number of graphs
    fig.tight_layout(pad=7.0) #to distanciate better the graphs
    country,peaks,ndaysPeak,lockdown,ndaysLock=statTable(df,df_lockdown_states)
    for d in range(0,len(top_ten)):
        #Calculating the peak of the curve, taking the max value for a column in the top ten
        
        lockdown_date=df_lockdown_states['Beginning Date'][df_lockdown_states['Country']==top_ten[d]]
        
        index=country.index(top_ten[d])
        peak=peaks[index]
        lock=lockdown[index]
        if(peak!=0):
            day_counter=ndaysPeak[index]
        else:
            day_counter='Peak not reached yet'
        if(lock!=0):
            lock_counter=ndaysLock[index]
        else:
            lock_counter='Lockdown not present'
        #building the subplots
        axs[i, j].plot( df.index,df[top_ten[d]],color=colorMapWorld[top_ten[d]], label= '')
        axs[i, j].set_title(top_ten[d])
        axs[i, j].set(xlabel='Dates', ylabel=name)
        axs[i, j].tick_params(labelrotation=45)
        axs[i, j].annotate('Counter from the 200 to the lockdown: '+str(lock_counter), xy=(10, 170), xycoords='axes points',
                size=10, ha='left', va='top',
                )
        axs[i, j].annotate('Counter from the 200 to the peak: '+str(day_counter), xy=(10, 150), xycoords='axes points',
                size=10, ha='left', va='top',
                )
        try:
            axs[i, j].axvline(x=lockdown_date.iloc[0],color='red')
        except:
            print()
       
        #Index incrementing. It must be done manually and not with a for otherwise it's impossibile
        j=j+1
    
        if(j==2):
            j=0
            i=i+1
            if(i==graphs):
                i=0

'''
A function that calculates the number of days between the peak and the 200 days and between lockdown  and the 200 days mark

Inputs:
df_matrix-> features to use to calculate the point in time of the 200 days
df_lockdown-> dates of the lockdowns


Output->
    country_list-> list of the countires
    peaks-> 0 and 1 if the peak is reached or not 
    ndaysPeak-> number of days between 200 days and the peak date
    lockdown-> 0 and 1 if the lockdown is reached or not 
    ndaysLock-> number of days between 200 days and the lockdown
    
'''                  
def statTable(df_matrix,df_lockdown):
    country_list=[]
    beg_list=[]
    peaks=[]
    ndaysPeak=[]
    lockdown=[]
    ndaysLock=[]
    for country in df_matrix.columns:
        for i in range(0,len(df_matrix[country])):
            if(df_matrix[country][i]>200): #check when we overcome the 200 mark in the feature and memorize the date
                country_list.append(country)
                beg_date=df_matrix.index[i]
                beg_list.append(beg_date)
                peak_date=df_matrix[df_matrix[country]==df_matrix[country].max()].index
                try:  #checking if the peak is reached yet
                    df_matrix.loc[peak_date+ timedelta(days=1)]
                    peak=1
                    
                except:
                    peak=0
                    
                peaks.append(peak)    
                if(peak==1):  #if the peak is present, we count the days, otherwise we count them until today
                    ndaysPeak.append(((peak_date-beg_date).days)[0])
                else:
                    ndaysPeak.append(((datetime.datetime.strptime('2020-05-20', "%Y-%m-%d")-beg_date).days))

                try: #checking if the lockdown is reached yet
                    a=df_lockdown[df_lockdown['Country']==country]
                    datetime.datetime.strftime(a['Beginning Date'].iloc[0], "%Y-%m-%d")
                    lock=1
                    
                except:
                    lock=0
                    
                lockdown.append(lock)
                if(lock==1): #if the lockdown is present, we count the days, otherwise we count them until today
                    ndaysLock.append(((a['Beginning Date'].iloc[0]-beg_date).days))
                else:
                    ndaysLock.append(((datetime.datetime.strptime('2020-05-20', "%Y-%m-%d")-beg_date).days))

                
                
                break
    return country_list,peaks,ndaysPeak,lockdown,ndaysLock



'''
Output->List containing days passed between lockdown and the peak for each nations, and not just the top ten

Input:
countires-> list of all countries
df_lockdown_states->dataset with the lockdown information
df-> dataset with the cases of every country


''' 

def peakDays(countries,df_lockdown_states,df):
    days=[]
    
    
    for d in range(0,len(countries)):
        #Calculating the peak of the curve, taking the max value for a column in the top ten
        lockdown_date=df_lockdown_states['Beginning Date'][df_lockdown_states['Country']==countries[d]]
        peak=df[countries[d]].max()
        peak=df[countries[d]][df[countries[d]]==peak]
        
        #then checking the peak to see if it's the last value (counter=Peak not reached) or not (counter = lenght between lockdown and peak) 
        newPeak=peakChecker(df[countries[d]],peak.index[0])
        if(newPeak!=0):
            day_counter=len(df[countries[d]][lockdown_date.iloc[0]:peak.index[0]])
        else:
            day_counter=0
        days.append(day_counter)
    return days

Autocorrelation

In [2]:
'''
10 graphs, one for each country in the top ten for the autocorrelation

Inputs:
name-> list of the countries

values_-> dataset with the data of every country
lag-> lag of the autocorrelation plot

Output->
Prints the 10 graphs
'''    
def autocorrelationPlot(name,values_,lag):
    
    values = values_.copy()
    fig, axs = plt.subplots(1, 2,figsize=(15,5)) #Subplot 5x2 
    fig.tight_layout(pad=7.0) #to distanciate better the graphs
    values=datasetAlignment(values[name])
    
    
    try:        
        plot_acf(values,color=colorMapWorld[name], label= '',lags=lag,ax=axs[0],title=name+" autocorrelation")
          
        plot_pacf(values,color=colorMapWorld[name], label= '',lags=lag,ax=axs[1],title=name+" partial autocorrelation")
    except:
        
        plot_acf(values,color=colorMapRegions[name], label= '',lags=lag,ax=axs[0],title=name+" autocorrelation")
       
        plot_pacf(values,color=colorMapRegions[name], label= '',lags=lag,ax=axs[1],title=name+" partial autocorrelation")
       
              
        

'''

Decomposition of the autocorrelation plot

Inputs:
name-> list of the countries

values_-> dataset with the data of every country

Output->
Prints the plots
'''
def decompositionPlot(name, values_):

        values = values_.copy()
        
        values=datasetAlignment(values[name])
        
        rcParams['figure.figsize'] = 18, 8
        #building the subplots
        decomposition=seasonal_decompose(values, model='additive', period=int((len(values)-1)/2),extrapolate_trend='freq')
      
        fig, axes = plt.subplots(4, 1, sharex=True)
        decomposition.observed.plot(ax=axes[0], legend=False, color=colorMapWorld[name])
        axes[0].set_ylabel('Observed')
        decomposition.trend.plot(ax=axes[1], legend=False, color=colorMapWorld[name])
        axes[1].set_ylabel('Trend')
        decomposition.seasonal.plot(ax=axes[2], legend=False, color=colorMapWorld[name])
        axes[2].set_ylabel('Seasonal')
        decomposition.resid.plot(ax=axes[3], legend=False, color=colorMapWorld[name])
        axes[3].set_ylabel('Residual')
        
       
        
'''
Remove the 0 values from the matrix and start from the "beginning date" where the new deaths/cases etc. starts to grow for the 
first time

Inputs:
matrix-> features for a country



Output->
matrix without the 0 values at the beginning

'''            
        
def datasetAlignment(matrix):
    for i in range(0,len(matrix)):
        if(matrix[i]>0):
            return matrix[i:]
        
    

Linear regression

In [1]:
'''
Linear regression cluster operations

Input:
x-> indipendent variable
y-> dipendent variable
labelx and y-> the name of the plot labels

output:
plot of the regression
accuracy informations (R^2 and mean square error)
'''


def linearRegression(x,y,labelx,labely):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    regr = LinearRegression().fit(x_train.to_numpy().reshape(-1, 1),y_train)


    # Make predictions using the testing set
    popDens_y_pred = regr.predict(x_test.to_numpy().reshape(-1, 1))


    # The coefficients
    print('Coefficients: \n', regr.coef_)
    # The mean squared error
    print('Mean squared error: %.2f'
          % mean_squared_error(y_test, popDens_y_pred))
    # The coefficient of determination: 1 is perfect prediction
    print('Coefficient of determination: %.2f'
          % r2_score(y_test, popDens_y_pred))

    # Plot outputs
    plt.figure(figsize=(15,7.5))
    plt.scatter(x_test.to_numpy().reshape(-1, 1),y_test,  color='red')
    plt.plot(x_test.to_numpy().reshape(-1, 1), popDens_y_pred, color='blue', linewidth=3)
    plt.xlabel(labelx)
    plt.ylabel(labely)
    plt.show()

Dickey-Fuller test

In [2]:
'''
Dicky fuller test

Input:
series->the time series in input

output:
 the dicky fuller test results
'''
def dickeyFuller (series):
    adf = adfuller(series)
    print('ADF Statistic: {}'.format(adf[0]))
    print('p-value: {}'.format(adf[1]))
    print('Critical Values:')
    for key, value in adf[4].items():
        print('\t{}: {}'.format(key, value))
    return adf

Linear regression for the part after the peak of the pandemic

In [4]:
'''
It pritns a plot to show how the data are flowing after the peak

Input:
dates-> list of all the dates from the beginning of the pandemic until today
data-> the features like new cases or deaths
country_name-> name of the country to plot in the graph title

output:
a plot with the linear regression of the curve 
X-> list of the number od days (a range)
y-> the features starting from the same data of the first element of X
starting date-> the number conversion of the data when the peak was reached
linear_regr-> linear regression of the curve
'''
def curveAfterPeak(dates,data, country_name):
    # date format is not suitable for modeling,so it's better to transform it into a number
    #For example, the first day of the peak in italy was reached around mid march, which is the 23th day from when the pandemic 
    #started in italy. Here we are doing that conversion
    if(data.max()!=data.iloc[-1]):
       
        for i in range(len(data)):
            if(data[i]==data.max()):
                starting_date=i
                break
       
        day_numbers = []
        plt.figure(figsize=(15,7.5))
        for i in range(1, len(dates[window-2:])):
            day_numbers.append([i])
        X = day_numbers
        # # let's train our model only with data after the peak
        X = X[starting_date:]
        y = data[starting_date:]
        # Instantiate Linear Regression
        linear_regr = linear_model.LinearRegression()
        # Train the model using the training sets
        linear_regr.fit(X, y)
        plt.scatter(X,y)
        plt.xlabel("Number of days from the first day with 200 new confirmed cases")
        plt.ylabel("Number of new confirmed cases")
        plt.title(country_name + " - Linear regression model")
        plt.plot(X, linear_regr.predict(X), color ='red')
        print ("Linear Regression Model Score:", "{:.2f}".format((linear_regr.score(X, y))))
        return X,y,starting_date, linear_regr
    else:
        print('Peak not reached yet')

In [3]:
'''
It prints a plot in logaritmic scale where we can see the prediction of the pandemic for a country, and see
when it will reach 0 with a certain confidence interval

Input:
X-> list of the number of days (a range)
y-> the features starting from the same data of the first element of X
TrainX-> list of the number of days (a range) used to make a predictionon the data
TrainY-> the features starting from the same data of the first element of X used to make the error from the prediction and the acutal data
starting date-> the number conversion of the data when the peak was reached
linear_regr-> linear regression of the curve
listDate-> the list of dates in their usual format
y_test-> the data that we use for testing of the future days we want to predict
prediction_days-> a parameter to enlarge or reduce the final plot of a certain amount of days
country_name-> name of the country to plot in the graph title

output:
a plot with the curve reaching 0
R^2 of the prediction

'''

def predictTrain(X, Y, TrainX,TrainY,starting_date,listDate, linear_regr,y_test,prediction_days, country_name):
    # convert date of the epidemic peak into datetime format
    future_days = prediction_days
    date=datetime.datetime.strftime(listDate[starting_date], '%Y-%m-%dT%H:%M:%S')
    date_zero = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%S')
    # creating x_ticks to make the plot more appealing
    date_prev = []
    x_ticks = []
    step = 5
    data_curr = date_zero
    x_current = peak_date
    n = int(future_days / step)
    for i in range(0, n): #strings that will be used on the ticks of the plot, converting the number of days in the actual date
        date_prev.append(str(data_curr.day) + "/" + str(data_curr.month))
        x_ticks.append(x_current)
        data_curr = data_curr + timedelta(days=step)
        x_current = x_current + step
 
    
    plt.figure(figsize=(15,7.5))
    y_pred = linear_regr.predict(TrainX)
    error = max_error(TrainY, y_pred)

 

    X_test = []

 

    for i in range(starting_date, starting_date + future_days):
        X_test.append([i])
        
    
    y_pred_linear = linear_regr.predict(X_test)
    y_pred_max = []
    y_pred_min = []
    for i in range(0, len(y_pred_linear)):
        y_pred_max.append(y_pred_linear[i] + error)
        y_pred_min.append(y_pred_linear[i] - error)
    plt.grid()
    plt.scatter(X,Y)
    # plot linear regression prediction
    plt.plot(X_test, y_pred_linear, color='green', linewidth=2)
    # plot maximum error
    plt.plot(X_test, y_pred_max, color='red', linewidth=1, linestyle='dashed')
    #plot minimum error
    plt.plot(X_test, y_pred_min, color='red', linewidth=1, linestyle='dashed')
    plt.xlim(starting_date, starting_date + future_days)
    plt.xticks(x_ticks, date_prev)
   
    print("Prediction score:", "{:.2f}".format(r2_score(y_test,y_pred_linear[len(TrainY):len(TrainY)+len(y_test)])))

 
    plt.xlabel("Dates")
    plt.ylabel("Number of new confirmed cases")
    plt.title(country_name + " - 0 new confirmed cases prediction")
    plt.yscale("log")
    plt.ylim(0, max(TrainY))
    plt.show()

Time series analysis

In [1]:
def test_stationarity(df_timeseries):
    """
    Check Stationariety of time series.
    Please use np.array or pd.series as Input with your TS data only
    """
    #Convert numpy array to pandas serie
    if type(df_timeseries) is np.ndarray:
        df_timeseries = pd.Series(df_timeseries) 
        
    try:
        #Determing rolling statistics
        rolmean = df_timeseries.rolling(window=12).mean()
        rolstd = df_timeseries.rolling(window=12).std()

        #Plot rolling statistics:
        orig = plt.plot(df_timeseries, color='blue',label='Original')
        mean = plt.plot(rolmean, color='red', label='Rolling Mean')
        std = plt.plot(rolstd, color='black', label = 'Rolling Std')
        plt.legend(loc='best')
        plt.title('Rolling Mean & Standard Deviation')
        plt.show(block=False)

        #Perform Dickey-Fuller test:
        print('Results of Dickey-Fuller Test:')

        dftest = adfuller(df_timeseries, autolag='AIC')
        dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
        for key,value in dftest[4].items():
            dfoutput['Critical Value (%s)'%key] = value
        
        # print(dfoutput)
    
        return dftest, dfoutput
    except Exception as message:
        print(f"Impossible to calc the stationariery of your TS: {message}")
        return None, None