In [2]:
import pandas as pd
import os
from src.__special__ import indices_path
import numpy as np

file_path = os.path.join(indices_path, 'INDEXP.csv')

INDEXP = pd.read_csv(file_path)

In [3]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Seleccionar todas las columnas menos la primera
INDEXP_pca = INDEXP.iloc[:, 1:].dropna()  # Esto selecciona todas las columnas excepto la primera y elimina NaN

# Estandarizar datos (equivalente a matriz de correlación en PCA)
scaler = StandardScaler()
data_scaled = scaler.fit_transform(INDEXP_pca)

# Aplicar PCA
pca = PCA(n_components=5)
pca_result = pca.fit_transform(data_scaled)

# Convertir resultados en DataFrame
pca_df = pd.DataFrame(pca_result, index=INDEXP_pca.index, columns=[f'PC{i+1}' for i in range(5)])

In [4]:
# 6. Ver la varianza explicada por cada componente
explained_variance = pca.explained_variance_ratio_

# 7. Cargas factoriales (coeficientes de los componentes principales)
loadings = pd.DataFrame(pca.components_, columns=INDEXP_pca.columns, index=[f'PC{i+1}' for i in range(5)]).T

# Mostrar las cargas factoriales
loadings


Unnamed: 0,PC1,PC2,PC3,PC4,PC5
"@dlog(exp1,0,12)",0.120798,0.261696,0.175128,-0.271865,-0.251008
"@dlog(exp2,0,12)",0.188353,0.01811,-0.163118,0.112744,-0.054293
"@dlog(exp3,0,12)",0.174244,0.156857,-0.213879,-0.02449,-0.064256
"@dlog(exp4,0,12)",0.17901,0.125594,-0.210183,-0.062828,-0.061677
"@dlog(exp5,0,12)",0.142445,0.174117,-0.034352,-0.280597,0.481417
"@dlog(exp6,0,12)",0.181704,0.104289,-0.196224,-0.040104,-0.036218
"@dlog(exp7,0,12)",0.159469,0.205319,-0.241102,-0.000966,-0.049819
"@dlog(exp8,0,12)",0.169866,0.169373,-0.233973,-0.015436,-0.057274
"@dlog(exp9,0,12)",0.173799,0.153355,-0.227905,-0.012528,-0.048951
"@dlog(exp10,0,12)",0.181205,0.155679,0.00906,-0.192434,-0.092913


In [5]:
# 8. Guardar resultados
pca_df.to_excel("PCA_Result.xlsx", sheet_name="PCA")

# 9. Mostrar resultados
explained_variance

array([0.66880315, 0.13342479, 0.07517508, 0.03002344, 0.02588353])

In [6]:
fact_load = loadings.to_numpy()

fact_load

array([[ 1.20797534e-01,  2.61695834e-01,  1.75127618e-01,
        -2.71865419e-01, -2.51007691e-01],
       [ 1.88353376e-01,  1.81101179e-02, -1.63117790e-01,
         1.12744201e-01, -5.42929067e-02],
       [ 1.74244095e-01,  1.56856686e-01, -2.13879264e-01,
        -2.44901417e-02, -6.42558658e-02],
       [ 1.79010049e-01,  1.25593899e-01, -2.10182675e-01,
        -6.28275327e-02, -6.16765603e-02],
       [ 1.42444978e-01,  1.74117478e-01, -3.43515918e-02,
        -2.80596606e-01,  4.81416970e-01],
       [ 1.81703720e-01,  1.04288601e-01, -1.96223744e-01,
        -4.01040610e-02, -3.62180234e-02],
       [ 1.59469006e-01,  2.05319344e-01, -2.41102386e-01,
        -9.66388025e-04, -4.98192135e-02],
       [ 1.69866499e-01,  1.69373398e-01, -2.33973319e-01,
        -1.54363788e-02, -5.72737072e-02],
       [ 1.73798826e-01,  1.53354588e-01, -2.27904718e-01,
        -1.25278424e-02, -4.89508738e-02],
       [ 1.81204796e-01,  1.55678797e-01,  9.05968210e-03,
        -1.92433507e-01

In [7]:
original_values = pd.read_csv(os.path.join(os.path.dirname(indices_path), "variables_originales_exp.csv"))

original_values

Unnamed: 0,_date_,exp1,exp2,exp3,exp4,exp5,exp6,exp7,exp8,exp9,exp10,exp11,exp12
0,1995-01-01,5065287.362,,,2962.878,18261.007,30299.0,,,,43.49076,8.581366e+06,
1,1995-02-01,5625751.133,,,2873.870,19502.294,28024.0,,,,43.88784,9.535998e+06,
2,1995-03-01,6555991.556,,,3650.007,22124.574,32894.0,,,,43.69789,1.067358e+07,
3,1995-04-01,5628182.383,,,4573.721,19504.515,38988.0,,,,45.15942,9.031623e+06,
4,1995-05-01,5783096.492,,,5117.085,20737.614,44153.0,,,,43.50870,9.424005e+06,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
379,2026-08-01,,,,,,,,,,,,
380,2026-09-01,,,,,,,,,,,,
381,2026-10-01,,,,,,,,,,,,
382,2026-11-01,,,,,,,,,,,,


In [9]:
or_values = original_values.to_numpy()

or_values = or_values[:, 1:6]
or_values

array([[5065287.362, nan, nan, 2962.878, 18261.007],
       [5625751.133, nan, nan, 2873.87, 19502.294],
       [6555991.556, nan, nan, 3650.007, 22124.574],
       ...,
       [nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan]], shape=(384, 5), dtype=object)

In [10]:
print(np.shape(fact_load.T))
print(np.shape(or_values))

y = np.dot(fact_load.T, or_values)
# print(y.shape)

# y

(5, 36)
(384, 5)
