## PCA

### Loading transformed GHSI 2021 data, and 2019 data

In [61]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import sys
sys.path.append('./functions')
from pcs_threshold import pcs_threshold

ghsi_2019_df = pd.read_csv("data/ghsi_2019_rearanged.csv")
ghsi_2021_transformed = pd.read_csv("data/ghsi_2021_transformed.csv")


Before conducting PCA, variables were grouped based on their conceptual meaning and intercorrelations, enabling easier interpretation of the obtained components.

In [62]:
GHSI_2021 = ghsi_2021_transformed.iloc[:, 1:]
GHSI_2019 = ghsi_2019_df.iloc[:, 1:8]
prosperity = ghsi_2019_df.iloc[:, [10, 11, 13, 14, 15]]
age = ghsi_2019_df.iloc[:, [12, 18, 19]]
disease = ghsi_2019_df.iloc[:, [22, 23, -1]]


Putting all the groups into a list so we can performe a for loop in order to do PCA on all of the groups simultaneously

In [63]:
all_groups = [GHSI_2021, GHSI_2019, prosperity, age, disease]
names = ['GHSI 2021', 'GHSI 2019', 'Prosperity', 'Age', 'Disease']


PCA

In [65]:
resultsPCA = {}
for i, group in enumerate(all_groups):

    # Normalizing data
    scaler = StandardScaler()
    normal_data = scaler.fit_transform(group)

    #PCA
    pca = PCA()
    score = pca.fit_transform(normal_data)
    explained = pca.explained_variance_ratio_ * 100
    latent = pca.explained_variance_

    nPCs = pcs_threshold(explained, 85)

    # saving results into a dictionary 

    resultsPCA[names[i]] = {

        'coeff' : pca.components_[:nPCs, :].T,
        'score' : score[:, :nPCs],
        'latent': latent[:nPCs],
        'explained' : explained[:nPCs],
        'nPCs': nPCs
    }

In [84]:
PCA_table = {}
for name in names:
    df = pd.DataFrame(
        resultsPCA[name]['score'],
        columns = [f"{name} PC{i+1}" for i in range(resultsPCA[name]['nPCs'])]
    )

    PCA_table[name] = df

    csv_name = f"{name.replace(' ', '_')}_PCS.csv"
    df.to_csv(f"data/{csv_name}")

print(PCA_table['GHSI 2021'].head())    


   GHSI 2021 PC1  GHSI 2021 PC2  GHSI 2021 PC3
0      -2.970420       1.542626      -0.915482
1      -0.897239      -0.620626       0.384835
2       1.986107       1.343400       0.971290
3       4.426760       0.538022       0.042024
4       2.269115      -0.858056      -0.904512
