In [24]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.linalg
import pickle
import pandas as pd
import sklearn 
import sys
import pandas as pd
import os
repo_dir = os.environ.get("REPO_DIR")
code_dir = os.path.join(repo_dir, "code/")
data_dir = os.path.join(repo_dir, "data/")
os.chdir(code_dir)


In [37]:
def df_to_demeaned_y_vars(task, 
                          df, 
                          method ="calc", 
                log_before_diff=False):
    
    hats = df.copy()
    hats.rename(columns = {task : "y_true"}, inplace=True)

    if method == "calc":
        country_means = hats.groupby("ISO_Code")["y_true"].mean().to_frame().rename(columns={"y_true" : "y_bar_country"})
        hats = hats.merge(country_means, "left", left_on = "ISO_Code", right_index=True)
    else:
        raise Exception("NotImplemented  - Invalid method input")
    
    hats["demeaned_y_true"] = hats.y_true - hats.y_bar_country
    
    if log_before_diff:
        hats["demeaned_y_true"] = np.log(hats.y_true) - np.log(hats.y_bar_country)
    return hats["demeaned_y_true"]

In [38]:
df = pd.read_pickle(data_dir + "int/GDL_HDI/HDI_indicators_and_indices_clean.p")

In [39]:
tasks = ['Sub-national HDI', 
         "Life expectancy", 
         "Mean years schooling", 
         "Expected years schooling",
         "GNI per capita in thousands of US$ (2011 PPP)"]

mat_df = df[tasks + ["ISO_Code"]]

mat_df[tasks] = mat_df[tasks].astype(float).copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [40]:
mat_df = mat_df.rename(columns = {"Sub-national HDI":"HDI","GNI per capita in thousands of US$ (2011 PPP)":"GNIpc" })

In [42]:
mat_df_countries = mat_df.pop("ISO_Code")

corr = mat_df.corr()

corr.columns = mat_df.columns
corr.index = mat_df.columns

corr = corr ** 2

In [43]:
print(corr.round(2).replace(np.nan, "").to_latex())

\begin{tabular}{lrrrrr}
\toprule
{} &   HDI &  Life expectancy &  Mean years schooling &  Expected years schooling &  GNIpc \\
\midrule
HDI                      &  1.00 &             0.79 &                  0.84 &                      0.82 &   0.62 \\
Life expectancy          &  0.79 &             1.00 &                  0.52 &                      0.57 &   0.46 \\
Mean years schooling     &  0.84 &             0.52 &                  1.00 &                      0.61 &   0.50 \\
Expected years schooling &  0.82 &             0.57 &                  0.61 &                      1.00 &   0.44 \\
GNIpc                    &  0.62 &             0.46 &                  0.50 &                      0.44 &   1.00 \\
\bottomrule
\end{tabular}



In [44]:

mask = np.zeros_like(corr, dtype=bool)
mask[np.triu_indices_from(mask)] = True
corr[mask] = np.nan
a = (corr
 .style
 .background_gradient(axis=None, vmin=-1, vmax=1)
 .highlight_null(null_color='#f1f1f1')  # Color NaNs grey
 .set_precision(2))

  


In [45]:
corr = corr.round(2)

In [46]:
corr[corr.isnull()] = ""

In [47]:
corr

Unnamed: 0,HDI,Life expectancy,Mean years schooling,Expected years schooling,GNIpc
HDI,,,,,
Life expectancy,0.79,,,,
Mean years schooling,0.84,0.52,,,
Expected years schooling,0.82,0.57,0.61,,
GNIpc,0.62,0.46,0.5,0.44,


In [48]:
print(corr.to_latex())

\begin{tabular}{llllll}
\toprule
{} &   HDI & Life expectancy & Mean years schooling & Expected years schooling & GNIpc \\
\midrule
HDI                      &       &                 &                      &                          &       \\
Life expectancy          &  0.79 &                 &                      &                          &       \\
Mean years schooling     &  0.84 &            0.52 &                      &                          &       \\
Expected years schooling &  0.82 &            0.57 &                 0.61 &                          &       \\
GNIpc                    &  0.62 &            0.46 &                  0.5 &                     0.44 &       \\
\bottomrule
\end{tabular}



In [49]:
a

Unnamed: 0,HDI,Life expectancy,Mean years schooling,Expected years schooling,GNIpc
HDI,,,,,
Life expectancy,0.79,,,,
Mean years schooling,0.84,0.52,,,
Expected years schooling,0.82,0.57,0.61,,
GNIpc,0.62,0.46,0.5,0.44,


In [50]:
mat_df["ISO_Code"] = mat_df_countries

for task in mat_df.columns[:-1]:
    mat_df["Within-ADM0 " + task] = df_to_demeaned_y_vars(task, mat_df, method="calc")
    mat_df.pop(task)

In [51]:
mat_df_countries = mat_df.pop("ISO_Code")

corr = mat_df.corr()

corr.columns = mat_df.columns
corr.index = mat_df.columns

corr = corr ** 2

In [52]:

mask = np.zeros_like(corr, dtype=bool)
mask[np.triu_indices_from(mask)] = True
corr[mask] = np.nan
b=(corr
 .style
 .background_gradient(axis=None, vmin=-1, vmax=1)
 .highlight_null(null_color='#f1f1f1')  # Color NaNs grey
 .set_precision(2))

  


In [53]:
corr = corr.round(2)
corr[corr.isnull()] = ""

In [54]:
corr

Unnamed: 0,Within-ADM0 HDI,Within-ADM0 Life expectancy,Within-ADM0 Mean years schooling,Within-ADM0 Expected years schooling,Within-ADM0 GNIpc
Within-ADM0 HDI,,,,,
Within-ADM0 Life expectancy,0.31,,,,
Within-ADM0 Mean years schooling,0.82,0.12,,,
Within-ADM0 Expected years schooling,0.65,0.1,0.46,,
Within-ADM0 GNIpc,0.17,0.03,0.1,0.07,


In [55]:
print(corr.to_latex())

\begin{tabular}{llllll}
\toprule
{} & Within-ADM0 HDI & Within-ADM0 Life expectancy & Within-ADM0 Mean years schooling & Within-ADM0 Expected years schooling & Within-ADM0 GNIpc \\
\midrule
Within-ADM0 HDI                      &                 &                             &                                  &                                      &                   \\
Within-ADM0 Life expectancy          &            0.31 &                             &                                  &                                      &                   \\
Within-ADM0 Mean years schooling     &            0.82 &                        0.12 &                                  &                                      &                   \\
Within-ADM0 Expected years schooling &            0.65 &                         0.1 &                             0.46 &                                      &                   \\
Within-ADM0 GNIpc                    &            0.17 &                        0.