In [11]:
import pandas as pd
import matplotlib.pyplot as plt 
import matplotlib.patches as mpatches
from sklearn.decomposition import PCA
import numpy as np
import git

#get the Git root location
git_repo = git.Repo('.', search_parent_directories=True)

#Load the data
df = pd.read_csv(f"{git_repo.working_tree_dir}\\Data\\data.csv")
df.drop('Unnamed: 0', axis=1, inplace=True)

start_index = df.columns.get_loc('A')
end_index = df.columns.get_loc('T')
ELEMENTS = np.array(df.iloc[:, start_index:end_index + 1].columns)

#PCA
pca = PCA().fit(df.iloc[:, start_index:end_index+1])

num_pc = pca.n_features_in_ #Number of PC's

labels = ['PC' + str(x) for x in range(1, num_pc+1)] #Create labels for te scree plot. 

pca_data = pca.transform(df.iloc[:, start_index:end_index+1]) #Coodinates of PC's
pca_df = pd.DataFrame(pca_data, columns=labels)

#Creating an dataframe with labels and PC's
df_complete = df[['CLASS']]
df_complete = pd.concat([df_complete, pca_df], axis=1)

#Igenvalues
per_var_eigen = np.round(pca.explained_variance_, decimals=2)
per_var_eigen_df = pd.DataFrame(per_var_eigen, columns=['Eigenvalues'], index=labels)

per_var = np.round(pca.explained_variance_ratio_*100, decimals=2) 
per_var_df = pd.DataFrame(per_var, columns=['Explained Variance (%)'], index=labels)

var_sum = np.cumsum(pca.explained_variance_ratio_*100).round(decimals=2) 
var_sum_df = pd.DataFrame(var_sum, columns=['Accumulated Variance'], index=labels)

loadings = pca.components_

df_loadings = pd.DataFrame() 
for i in range(num_pc):
    loading_iterate = pd.Series(loadings[i], index=ELEMENTS, name=f'PC{i+1}') 
    df_loadings_provedor = pd.DataFrame(data=loading_iterate) 
    df_loadings = pd.concat([df_loadings, df_loadings_provedor], axis=1) 

In [12]:
df_complete

Unnamed: 0,CLASS,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,...,PC11,PC12,PC13,PC14,PC15,PC16,PC17,PC18,PC19,PC20
0,CLASS1,0.860769,-3.866766,1.929391,2.169356,1.080451,0.614672,1.925480,0.727798,0.230972,...,-0.485046,-0.308911,-0.154461,-0.071751,0.698060,-0.153416,-0.081683,0.162356,-0.262180,0.118198
1,CLASS1,0.972496,-3.896529,1.828709,1.129940,-0.041232,0.947140,2.425382,0.655589,0.008518,...,-0.674971,-0.547048,-0.118054,0.001259,1.236165,0.102783,0.222431,0.137066,-0.137522,0.149027
2,CLASS1,-2.316297,-4.261354,1.875725,0.401175,0.686471,0.041376,1.795758,-0.624037,-0.255581,...,-0.287262,-0.281997,-0.961600,-0.233126,1.276689,-0.331260,0.188867,0.377567,0.157464,0.139362
3,CLASS1,-1.770487,-4.326114,1.807570,0.541813,0.852959,0.555694,1.354586,-0.122412,-0.065748,...,-0.335390,-0.409056,-0.812781,-0.526097,1.461783,0.107684,0.027988,0.200562,0.021434,0.139505
4,CLASS1,1.368355,-4.538260,2.420986,0.851103,0.126369,-1.760576,1.621628,1.326649,0.078800,...,-0.812420,-0.221607,-0.296491,-0.468429,1.009561,-0.965536,0.171046,0.525166,0.059496,0.129792
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19011,CLASS9,0.346345,-0.770322,0.298561,0.275480,0.762620,1.128520,0.610760,-0.016280,0.085639,...,0.321350,0.057883,0.288556,0.146771,-0.573763,-0.037558,-0.343298,0.020653,-0.269085,0.000696
19012,CLASS9,0.229183,-0.909894,0.157672,0.446940,0.903342,0.993086,0.554921,-0.101541,0.227477,...,0.255882,-0.000470,0.393419,0.196276,-0.648246,-0.091611,-0.317294,-0.042135,-0.299622,-0.012695
19013,CLASS9,-0.125345,-0.464901,0.528063,0.216790,0.876640,1.174346,0.672115,-0.028889,0.042745,...,0.207981,0.040971,0.359276,0.151230,-0.568277,0.053789,-0.350647,0.092091,-0.214493,0.012375
19014,CLASS9,0.072178,-0.909929,0.330988,0.352800,0.577770,1.415850,0.334087,0.038787,-0.011450,...,-0.006198,0.037613,0.130477,0.117234,0.183406,0.390174,-0.356243,0.052883,-0.188776,0.029928
