# Análisis De Componentes Principales

Las tasas de rendimiento semanales de cinco acciones que cotizan en la Bolsa de Valores de Nueva York
se dan a continuación

In [1]:
import numpy as np

In [2]:
names = ("JPMorgan","Citibank","WellsFargo","RoyalDutchShell","ExxonMobil")
stock = np.genfromtxt('Stock-Price.txt', names=names)

In [3]:
import pandas as pd

In [4]:
data = pd.DataFrame(stock)

In [5]:
data.tail(8)

Unnamed: 0,JPMorgan,Citibank,WellsFargo,RoyalDutchShell,ExxonMobil
95,0.025683,0.052527,0.040696,-0.031662,-0.018848
96,-0.006062,0.008633,0.005841,0.044558,0.030594
97,0.021745,0.022964,0.029198,0.00844,0.03193
98,0.003374,-0.015306,-0.023824,-0.001674,-0.017227
99,0.003363,0.002902,-0.003051,-0.001219,-0.009701
100,0.017015,0.009506,0.018199,-0.016176,-0.007561
101,0.010393,-0.002661,0.004429,-0.002482,-0.01645
102,-0.012795,-0.014368,-0.01874,-0.004976,-0.016373


In [6]:
# Matriz de COVARIANZA

S = data.cov(); S

Unnamed: 0,JPMorgan,Citibank,WellsFargo,RoyalDutchShell,ExxonMobil
JPMorgan,0.000433,0.000276,0.000159,6.4e-05,8.9e-05
Citibank,0.000276,0.000439,0.00018,0.000181,0.000123
WellsFargo,0.000159,0.00018,0.000224,7.3e-05,6.1e-05
RoyalDutchShell,6.4e-05,0.000181,7.3e-05,0.000722,0.000508
ExxonMobil,8.9e-05,0.000123,6.1e-05,0.000508,0.000766


In [7]:
# Matriz de CORRELACIONES

R = data.corr(); R

Unnamed: 0,JPMorgan,Citibank,WellsFargo,RoyalDutchShell,ExxonMobil
JPMorgan,1.0,0.632288,0.510497,0.114602,0.154463
Citibank,0.632288,1.0,0.574142,0.322292,0.212675
WellsFargo,0.510497,0.574142,1.0,0.182499,0.146207
RoyalDutchShell,0.114602,0.322292,0.182499,1.0,0.683378
ExxonMobil,0.154463,0.212675,0.146207,0.683378,1.0


In [8]:
# Eigenvalores y vectores de S

autoval_S, autovec_S = np.linalg.eig(S)

autoval_S

array([0.00136768, 0.00070116, 0.0002538 , 0.0001426 , 0.00011889])

In [9]:
autovec_S

array([[ 0.22282283,  0.62522603, -0.32611218, -0.662759  ,  0.11765952],
       [ 0.30728997,  0.57039003,  0.24959014,  0.4140935 , -0.58860803],
       [ 0.1548103 ,  0.34450492,  0.03763929,  0.49704993,  0.78030428],
       [ 0.63896798, -0.24794753,  0.64249741, -0.30886888,  0.14845546],
       [ 0.65090441, -0.32184779, -0.64586064,  0.21637575, -0.09371777]])

In [10]:
# Eigenvalores y vectores de R

autoval_R, autovec_R = np.linalg.eig(R)

autoval_R

array([2.43727312, 1.40701266, 0.25516988, 0.50051275, 0.40003159])

In [11]:
autovec_R

array([[ 0.46908321,  0.36800696,  0.3841216 ,  0.60431522,  0.36302278],
       [ 0.53240549,  0.23646236, -0.49618794,  0.13610618, -0.6292079 ],
       [ 0.46516333,  0.3151795 ,  0.07116948, -0.7718281 ,  0.28896583],
       [ 0.38734594, -0.58503725,  0.59466408, -0.09336192, -0.38125151],
       [ 0.36068206, -0.60584628, -0.49755167,  0.10882629,  0.49341454]])

In [12]:
# Análisis de componentes principales

from sklearn.decomposition import PCA

In [13]:
# Elegimos número de componentes
n = data.shape[1]
pca = PCA(n_components=n)

In [14]:
model = pca.fit_transform(data)

# Función para PCA

In [103]:
def function_pca(datos, matriz):
    from sklearn.decomposition import PCA
    n = datos.shape[1]
    pc = []
    for i in range(1,n+1): 
        pc.append('P%s'%i)
        
    if matriz == "S":
        S = datos.cov()
        eigenval, eigenvec = np.linalg.eig(S)
        
        total_sum = sum(eigenval)
        var_exp = [(i/total_sum) for i in sorted(eigenval, reverse=True)]
        cum_var_exp = np.cumsum(var_exp)
        
        pca = PCA(n_components=n)
        model = pca.fit_transform(datos)
        sd = pd.DataFrame(model).std()
        eigenval=sorted(eigenval, reverse=True)
        
        results = pd.DataFrame({'PC':pc, 'Eigenvalues':eigenval, 'Standard deviation': sd, 'Proportion of Variance':var_exp, 
                                'Cumulative Proportion':cum_var_exp}).T
        PC = pd.DataFrame(eigenvec)
        
    if matriz == "R":
        R = datos.corr()
        eigenval, eigenvec = np.linalg.eig(R)
        
        total_sum = sum(eigenval)
        var_exp = [(i/total_sum) for i in sorted(eigenval, reverse=True)]
        cum_var_exp = np.cumsum(var_exp)
        
        from sklearn.preprocessing import StandardScaler
        datos_scale = StandardScaler().fit_transform(datos)
        pca = PCA(n_components=n)
        model = pca.fit_transform(datos_scale)
        sd = pd.DataFrame(model).std()
        eigenval=sorted(eigenval, reverse=True)
        
        results = pd.DataFrame({'PC':pc, 'Eigenvalues':eigenval, 'Standard deviation': sd, 'Proportion of Variance':var_exp, 
                                'Cumulative Proportion':cum_var_exp}).T
        PC = pd.DataFrame(eigenvec)
    
    return list([results, PC])

In [104]:
function_pca(data, 'S') # Usando la matriz de covarianza

[                               0         1         2         3         4
 PC                            P1        P2        P3        P4        P5
 Eigenvalues             0.001368  0.000701  0.000254  0.000143  0.000119
 Standard deviation      0.036982  0.026479  0.015931  0.011942  0.010904
 Proportion of Variance  0.529261  0.271333  0.098216  0.055184  0.046007
 Cumulative Proportion   0.529261  0.800594  0.898809  0.953993       1.0,
           0         1         2         3         4
 0  0.222823  0.625226 -0.326112 -0.662759  0.117660
 1  0.307290  0.570390  0.249590  0.414094 -0.588608
 2  0.154810  0.344505  0.037639  0.497050  0.780304
 3  0.638968 -0.247948  0.642497 -0.308869  0.148455
 4  0.650904 -0.321848 -0.645861  0.216376 -0.093718]

In [106]:
function_pca(data, 'R') # Usando la matriz de correlacion

[                               0         1         2         3         4
 PC                            P1        P2        P3        P4        P5
 Eigenvalues             2.437273  1.407013  0.500513  0.400032   0.25517
 Standard deviation      1.568811  1.191976  0.710929  0.635573  0.507614
 Proportion of Variance  0.487455  0.281403  0.100103  0.080006  0.051034
 Cumulative Proportion   0.487455  0.768857   0.86896  0.948966       1.0,
           0         1         2         3         4
 0  0.469083  0.368007  0.384122  0.604315  0.363023
 1  0.532405  0.236462 -0.496188  0.136106 -0.629208
 2  0.465163  0.315179  0.071169 -0.771828  0.288966
 3  0.387346 -0.585037  0.594664 -0.093362 -0.381252
 4  0.360682 -0.605846 -0.497552  0.108826  0.493415]