In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

import time

import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

from itertools import combinations

from scipy.stats import kendalltau, pearsonr, spearmanr

# Setting up the data

In [None]:
WDI_GFDI_filled = pd.read_excel(r"C:\Users\klyukin\Documents\Python Scripts\phd\_code\research_paper\WDI_GFDI_filled.xlsx")

WDI_GFDI_china = WDI_GFDI_filled[WDI_GFDI_filled['Country Code'].isin(['CHN'])]
WDI_GFDI_china['Stocks traded, total value (current US$)'] = WDI_GFDI_china['Stocks traded, total value (current US$)'].interpolate(method='linear')
WDI_GFDI_not_china = WDI_GFDI_filled[~WDI_GFDI_filled['Country Code'].isin(['CHN'])]
WDI_GFDI_filled = pd.concat([WDI_GFDI_china, WDI_GFDI_not_china])

WDI_GFDI_filled_2 = WDI_GFDI_filled[['Country Code', 'Year', 'GDP per capita, PPP (constant 2021 international $)']].copy()
WDI_GFDI_filled_2.Year = WDI_GFDI_filled_2.Year+1

WDI_GFDI_filled_2.rename(columns={'GDP per capita, PPP (constant 2021 international $)': 'GDP per capita, PPP (constant 2021 international $) LAST'
                                 }, inplace=True)

WDI_GFDI_filled = pd.merge(WDI_GFDI_filled, WDI_GFDI_filled_2, how='left', on=['Year', 'Country Code'])

WDI_GFDI_filled['GDP per capita, PPP (constant 2021 international $) (annual growth rate %)'] = WDI_GFDI_filled['GDP per capita, PPP (constant 2021 international $)']/WDI_GFDI_filled['GDP per capita, PPP (constant 2021 international $) LAST']

WDI_GFDI_filled['Value-traded ratio (%)'] = WDI_GFDI_filled['Stocks traded, total value (current US$)'] / WDI_GFDI_filled['GDP (current US$)'] * 100
WDI_GFDI_filled['Stock market turnover ratio (%)'] = WDI_GFDI_filled['Stocks traded, total value (current US$)'] / WDI_GFDI_filled['Market capitalization of listed domestic companies (current US$)'] * 100
WDI_GFDI_filled['Value-traded ratio to stock price volatility'] = WDI_GFDI_filled['Value-traded ratio (%)'] / WDI_GFDI_filled['Stock price volatility']
WDI_GFDI_filled['Market capitalization to GDP'] = WDI_GFDI_filled['Market capitalization of listed domestic companies (current US$)'] / WDI_GFDI_filled['GDP (current US$)'] * 100

WDI_GFDI_filled = WDI_GFDI_filled[WDI_GFDI_filled['Year'].between(1996, 2020)]

country_names = pd.read_excel(r"C:\Users\klyukin\Documents\Python Scripts\phd\_code\research_paper\research_countries.xlsx")
WDI_GFDI_filled = pd.merge(WDI_GFDI_filled, country_names, how='left', on='Country Code')

WDI_GFDI_filled = WDI_GFDI_filled[['Country Name', 'Country Code', 'Year', 'Bank deposits to GDP (%)',
                                   'Broad money (% of GDP)', 'Central bank assets to GDP (%)',
                                   'Domestic credit to private sector (% of GDP)',
                                   'Financial system deposits to GDP (%)', 'GDP (current LCU)',
                                   'GDP (current US$)', 'GDP per capita (current LCU)',
                                   'GDP per capita, PPP (constant 2021 international $)',
                                   'Market capitalization of listed domestic companies (current US$)',
                                   'Stock price volatility', 'Stocks traded, total value (current US$)',
                                   'GDP per capita, PPP (constant 2021 international $) (annual growth rate %)',
                                   'GDP, PPP (current international $)',
                                   'Value-traded ratio (%)', 'Stock market turnover ratio (%)',
                                   'Value-traded ratio to stock price volatility',
                                   'Market capitalization to GDP']]

# creating a weighted average funciton
def weighted_average(dataframe, value, weight):
    val = dataframe[value]
    wt = dataframe[weight]
    return (val * wt).sum() / wt.sum()

#K means Clustering algorothim function
def doKmeans(X, nclust):
    model = KMeans(nclust, max_iter=1000)
    model.fit(X)
    clust = model.predict(X)
    cent = model.cluster_centers_
    ss = model.inertia_
    features = model.n_features_in_
    return (clust, cent, ss)

wh1 = WDI_GFDI_filled[[
    'Stock price volatility'
    , 'Value-traded ratio (%)'
    , 'Stock market turnover ratio (%)'
    , 'Market capitalization to GDP'
    , 'Bank deposits to GDP (%)'
    , 'Broad money (% of GDP)'
    , 'Central bank assets to GDP (%)'
    , 'Domestic credit to private sector (% of GDP)'
    , 'GDP per capita, PPP (constant 2021 international $) (annual growth rate %)'
]]

WDI_GFDI_corr_df = WDI_GFDI_filled[['Country Name'
                                    , 'GDP per capita, PPP (constant 2021 international $) (annual growth rate %)'
                                    , 'Stock price volatility'
                                    , 'Value-traded ratio (%)'
                                    , 'Stock market turnover ratio (%)'
                                    , 'Market capitalization to GDP'
                                    , 'Bank deposits to GDP (%)'
                                    , 'Broad money (% of GDP)'
                                    , 'Central bank assets to GDP (%)'
                                    , 'Domestic credit to private sector (% of GDP)'                                    
                                   ]]

WDI_GFDI_corr_df.rename(columns={'GDP per capita, PPP (constant 2021 international $) (annual growth rate %)': 'Growth rate'
                                 , 'Stock price volatility': 'Volatility'
                                 , 'Value-traded ratio (%)': 'Value-traded'
                                 , 'Stock market turnover ratio (%)': 'Turnover'
                                 , 'Market capitalization to GDP': 'MarCap to GDP'
                                 , 'Bank deposits to GDP (%)': 'Deposits to GDP'
                                 , 'Broad money (% of GDP)': 'Broad money'
                                 , 'Central bank assets to GDP (%)': 'Central bank assets'
                                 , 'Domestic credit to private sector (% of GDP)': 'Credit to private'                           
                                }, inplace=True)

# who are we studying

In [None]:
# Create a choropleth map
fig = px.choropleth(
    WDI_GFDI_corr_df['Country Name'].unique() ,
    locations=WDI_GFDI_corr_df['Country Name'].unique(),
    locationmode='country names',
    projection = 'winkel tripel',
    width=1400,  # Set desired width (in pixels)
    height=800   # Set desired height (in pixels)
)

# Show figure
fig.show()

# pair plot

In [None]:
sns.set_theme(style="ticks", font_scale=1.5)

sns.pairplot(
    WDI_GFDI_corr_df[['Growth rate'
                      
                      , 'Volatility'
                      , 'Value-traded'
                      , 'Turnover'
                      , 'MarCap to GDP'
                      
                      , 'Deposits to GDP'
                      , 'Broad money'
                      , 'Central bank assets'
                      , 'Credit to private'
                     ]]
    
    # , x_vars=[
    #     # 'Volatility'
    #     # , 'Value-traded'
    #     # , 'Turnover'
    #     # , 'MarCap to GDP'
        
    #     'Deposits to GDP'
    #     , 'Broad money'
    #     , 'Central bank assets'
    #     , 'Credit to private'
    # ]
    # , y_vars=["Growth rate"]
    
    , kind='reg'
    , height=4
    , plot_kws={'line_kws': {'linestyle': '--'
                             , 'linewidth': 1
                            }
                , 'scatter_kws': {'s': 25
                                  , 'alpha': 0.3}
               }
)


# correlation

In [None]:
def kendall_pval(x,y):
    return kendalltau(x,y)[1]

def pearsonr_pval(x,y):
    return pearsonr(x,y)[1]

def spearmanr_pval(x,y):
    return spearmanr(x,y)[1]

In [None]:
cor = WDI_GFDI_corr_df.drop(columns=['Country Name'])
cor_value = cor.corr(method='spearman') #Calculate the correlation of the above variables

sns.set_theme(style="ticks", font_scale=0.9)
sns.heatmap(cor_value, annot = True, square = True, vmin=-1, vmax=1, linewidth=.5, fmt=".2f") #Plot the correlation as heat map

In [None]:
cor_p_value = cor.corr(method=spearmanr_pval)
np.fill_diagonal(cor_p_value.values, 0) # for some reason the diagonal p value for correlation with itself gets equal to 1
sns.heatmap(cor_p_value
            , annot = True
            , square = True
            , vmin=0, vmax=0.1
            , linewidth=.5
            , fmt=".2f", cmap = 'RdYlGn_r') #Plot the correlation as heat map

In [None]:
WDI_GFDI_corr_df.insert(1,'Year',WDI_GFDI_filled['Year'])

In [None]:
max(WDI_GFDI_corr_df['Year'])-10

In [None]:
palette = sns.color_palette("YlOrRd", n_colors=len(WDI_GFDI_corr_df['Year'].unique()))


for column in WDI_GFDI_corr_df.columns[2:]:
    print(column)
    sns.set_theme(style="ticks", font_scale=1.15)
    ax = sns.boxplot(x='Country Name'
                     , y=column
                     , data = WDI_GFDI_corr_df
                     , color='#99c2a2'
                    )
    ax = sns.stripplot(x="Country Name" # stripplot # swarmplot
                       , y=column, data = WDI_GFDI_corr_df
                       , size=5
                       , hue="Year"
                       , palette=palette
                       , alpha=0.5
                      )
    plt.xticks(rotation=90)  # Rotate x-axis labels
    # plt.yticks(rotation=45)  # Rotate y-axis labels
    
    ax.get_legend().remove()
    
    plt.legend(title='Year'
           , ncols = 2
           , handletextpad=1.5
           , labelspacing=1.25
           , title_fontsize=15
           , borderpad=1
           , fontsize=10
           # , loc="upper left"
           , bbox_to_anchor=(1,1)
          )
    
    # plt.savefig(r'C:\Users\klyukin\Documents\Python Scripts\phd\_code\research_paper\dec 5th\boxplots\\' + column.replace(' ','_')+'_raw.png', bbox_inches = 'tight')
    # plt.clf()
    plt.show()

# pair plot with countries

In [None]:
sns.set_theme(style="ticks", font_scale=1.8)

sns.pairplot(
    WDI_GFDI_corr_df[['Country Name'                               
                               , 'Growth rate'
                               
                               , 'Volatility'
                               , 'Value-traded'
                               , 'Turnover'
                               , 'MarCap to GDP'
                               
                               , 'Deposits to GDP'
                               , 'Broad money'
                               , 'Central bank assets'
                               , 'Credit to private'
                              ]]
    
    , x_vars=[
              'Volatility'
              # 'Value-traded'
              # 'Turnover'
              # 'MarCap to GDP'
    
              # 'Deposits to GDP'
              # 'Broad money'
              # 'Central bank assets'
              # 'Credit to private'
             ]
    , y_vars=["Growth rate"]
    
    , hue="Country Name"
    , palette=sns.color_palette("tab20",n_colors= 17)
    , height=8
    , plot_kws = {'s': 100
                  # , 'alpha': 0.5
                 }
)

In [None]:
WDI_GFDI_corr_df_log = WDI_GFDI_corr_df.copy()
WDI_GFDI_corr_df_log['Volatility'] = np.log2(WDI_GFDI_corr_df_log['Volatility'])
WDI_GFDI_corr_df_log['Value-traded'] = np.log2(WDI_GFDI_corr_df_log['Value-traded'])
WDI_GFDI_corr_df_log['Turnover'] = np.log2(WDI_GFDI_corr_df_log['Turnover'])
WDI_GFDI_corr_df_log['MarCap to GDP'] = np.log2(WDI_GFDI_corr_df_log['MarCap to GDP'])

WDI_GFDI_corr_df_log['Deposits to GDP'] = np.log2(WDI_GFDI_corr_df_log['Deposits to GDP'])
WDI_GFDI_corr_df_log['Broad money'] = np.log2(WDI_GFDI_corr_df_log['Broad money'])
WDI_GFDI_corr_df_log['Central bank assets'] = np.log2(WDI_GFDI_corr_df_log['Central bank assets'])
WDI_GFDI_corr_df_log['Credit to private'] = np.log2(WDI_GFDI_corr_df_log['Credit to private'])

In [None]:
sns.set_theme(style="ticks", font_scale=1.5)

sns.pairplot(
    WDI_GFDI_corr_df_log[['Country Name'                               
                          , 'Growth rate'
                          
                          , 'Volatility'
                          , 'Value-traded'
                          , 'Turnover'
                          , 'MarCap to GDP'
                          
                          , 'Deposits to GDP'
                          , 'Broad money'
                          , 'Central bank assets'
                          , 'Credit to private']]
    
    # , x_vars=[
    #     # 'Volatility'
    #     # , 'Value-traded'
    #     # , 'Turnover'
    #     # , 'MarCap to GDP'
        
    #     'Deposits to GDP'
    #     , 'Broad money'
    #     , 'Central bank assets'
    #     , 'Credit to private'
    # ]
    # , y_vars=["Growth rate"]
    
    , kind='reg'
    
    , height=4
    
    , plot_kws={'line_kws': {'linestyle': '--'
                             , 'linewidth': 1
                             # , 'alpha': 0.75
                            }
                , 'scatter_kws': {'s': 50
                                  , 'alpha': 0.3}}
)


In [None]:
cor = WDI_GFDI_corr_df_log.drop(columns=['Country Name'])
cor_value = cor.corr(method='pearson') #Calculate the correlation of the above variables

sns.set_theme(style="ticks", font_scale=0.9)
sns.heatmap(cor_value, annot = True, square = True, vmin=-1, vmax=1, linewidth=.5, fmt=".2f") #Plot the correlation as heat map

In [None]:
cor_p_value = cor.corr(method=pearsonr_pval)
np.fill_diagonal(cor_p_value.values, 0) # for some reason the diagonal p value for correlation with itself gets equal to 1
sns.heatmap(cor_p_value
            , annot = True
            , square = True
            , vmin=0, vmax=0.1
            , linewidth=.5
            , fmt=".2f", cmap = 'RdYlGn_r') #Plot the correlation as heat map

In [None]:
sns.set_theme(style="ticks", font_scale=1.8)

sns.pairplot(
    WDI_GFDI_corr_df_log[['Country Name'                               
                          , 'Growth rate'
                          
                          , 'Volatility'
                          , 'Value-traded'
                          , 'Turnover'
                          , 'MarCap to GDP'
                          
                          , 'Deposits to GDP'
                          , 'Broad money'
                          , 'Central bank assets'
                          , 'Credit to private']]
    
    , x_vars=[
        # 'Volatility'
        # 'Value-traded'
        # 'Turnover'
        # 'MarCap to GDP'
        
        # 'Deposits to GDP'
        # 'Broad money'
        # 'Central bank assets'
        'Credit to private'
    ]
    , y_vars=["Growth rate"]
    
    , hue="Country Name"
    , palette=sns.color_palette("tab20",n_colors= 17)
    , height=8
    , plot_kws = {'s': 100
                  # , 'alpha': 0.5
                 }
)


In [None]:
palette = sns.color_palette("YlOrRd", n_colors=len(WDI_GFDI_corr_df_log['Year'].unique()))


for column in WDI_GFDI_corr_df_log.columns[2:]:
    print(column)
    sns.set_theme(style="ticks", font_scale=1.15)
    ax = sns.boxplot(x='Country Name'
                     , y=column
                     , data = WDI_GFDI_corr_df_log
                     , color='#99c2a2'
                    )
    ax = sns.stripplot(x="Country Name" # stripplot # swarmplot
                       , y=column, 
                       data = WDI_GFDI_corr_df_log
                       , size=5
                       , hue="Year"
                       , palette=palette
                       , alpha=0.5
                      )
    plt.xticks(rotation=90)  # Rotate x-axis labels
    # plt.yticks(rotation=45)  # Rotate y-axis labels
    
    ax.get_legend().remove()
    
    plt.legend(title='Year'
           , ncols = 2
           , handletextpad=1.5
           , labelspacing=1.25
           , title_fontsize=15
           , borderpad=1
           , fontsize=10
           # , loc="upper left"
           , bbox_to_anchor=(1,1)
          )
    
    # plt.savefig(r'C:\Users\klyukin\Documents\Python Scripts\phd\_code\research_paper\dec 5th\boxplots\\' + column.replace(' ','_')+'_log2.png', bbox_inches = 'tight')
    # plt.clf()
    plt.show()

In [None]:
## pair plot with select countries

WDI_GFDI_corr_df_select = WDI_GFDI_corr_df[WDI_GFDI_corr_df['Country Name'].isin(['China'
                                                                                  ,'India'
                                                                                  ,'United States'
                                                                                  , 'Germany'
                                                                                  # , 'Japan'
                                                                                  # ,'Singapore'
                                                                                  # ,'Hong Kong SAR, China'
                                                                                 ])]

In [None]:

for column in WDI_GFDI_corr_df_select.columns[3:]:


    print(column)
    
    sns.set_theme(style="ticks", font_scale=1.8)
    
    sns.pairplot(
        WDI_GFDI_corr_df_select[['Country Name'                               
                                   , 'Growth rate'
                                   
                                   , 'Volatility'
                                   , 'Value-traded'
                                   , 'Turnover'
                                   , 'MarCap to GDP'
                                   
                                   , 'Deposits to GDP'
                                   , 'Broad money'
                                   , 'Central bank assets'
                                   , 'Credit to private'
                                  ]]
        
        , x_vars=[column]
        , y_vars=["Growth rate"]
        
        , hue="Country Name"
        , palette=(['#1f77b4', '#ff7f0e', '#98df8a', '#c7c7c7'])
        , height=8
        , kind='reg'
    
        , plot_kws={'line_kws': {'linestyle': '--'
                             , 'linewidth': 1
                             # , 'alpha': 0.5
                            }
                , 'scatter_kws': {'s': 100
                                  # , 'alpha': 0.5
                                 }}
    )

    plt.savefig(r'C:\Users\klyukin\Documents\Python Scripts\phd\_code\research_paper\dec 5th\select_countries\select_countries_' + column.replace(' ','_')+'.png', bbox_inches = 'tight')
    plt.clf()
    # plt.show()

In [None]:
## pair plot with select countries

WDI_GFDI_corr_df_select = WDI_GFDI_corr_df[WDI_GFDI_corr_df['Country Name'].isin(['China'
                                                                                  ,'India'
                                                                                  ,'United States'
                                                                                  , 'Germany'
                                                                                  # , 'Japan'
                                                                                  # ,'Singapore'
                                                                                  # ,'Hong Kong SAR, China'
                                                                                 ])]

sns.set_theme(style="ticks", font_scale=4)

sns.pairplot(
    WDI_GFDI_corr_df_select[['Country Name'                               
                               , 'Growth rate'
                               
                               , 'Volatility'
                               , 'Value-traded'
                               , 'Turnover'
                               , 'MarCap to GDP'
                               
                               , 'Deposits to GDP'
                               , 'Broad money'
                               , 'Central bank assets'
                               , 'Credit to private'
                              ]]
    
    # , x_vars=[
    #           # 'Volatility'
    #           # 'Value-traded'
    #           # 'Turnover'
    #           # 'MarCap to GDP'
    
    #           # 'Deposits to GDP'
    #           # 'Broad money'
    #           # 'Central bank assets'
    #           # 'Credit to private'
    #          ]
    # , y_vars=["Growth rate"]
    
    , hue="Country Name"
    , palette=(['#1f77b4', '#ff7f0e', '#98df8a', '#c7c7c7'])
    , height=8
    , kind='reg'

    , plot_kws={'line_kws': {'linestyle': '--'
                         , 'linewidth': 1
                         # , 'alpha': 0.5
                        }
            , 'scatter_kws': {'s': 100
                              # , 'alpha': 0.5
                             }}
)