In [1]:
import pandas as pd
import numpy as np

In [2]:
combined_df = pd.read_csv("../data/stocks-COMBINED-Jan2020-Dec2024.csv")
combined_df.head(2)

Unnamed: 0.1,Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Year,EPS,Ticker,Price_Return,Log_Return,Volatility,Moving_Average,RSI,MACD,Sharpe_Ratio,Max_Drawdown
0,0,2024-11-01,141.86,144.44,144.54,141.32,39030000.0,-0.0153,2024,0.7,AMD,0.008173,0.00814,0.020197,143.83,411.212815,0.728616,-0.010539,-0.000235
1,1,2024-10-31,144.07,147.8,148.68,143.33,44390000.0,-0.0305,2024,0.7,AMD,0.015579,0.015459,0.018745,142.68,310.200927,0.792438,-0.010539,-0.000235


In [4]:
combined_df.shape

(4872, 19)

### 1 Standardize data to have mean of 0 and standard deviation of 1


In [9]:
combined_df.dtypes

Unnamed: 0          int64
Date               object
Price             float64
Open              float64
High              float64
Low               float64
Vol.              float64
Change %          float64
Year                int64
EPS               float64
Ticker             object
Price_Return      float64
Log_Return        float64
Volatility        float64
Moving_Average    float64
RSI               float64
MACD              float64
Sharpe_Ratio      float64
Max_Drawdown      float64
dtype: object

In [11]:
numerical_features_values

NameError: name 'numerical_features_values' is not defined

In [None]:
numerical_features = combined_df.select_dtypes(include=['float64', 'int64'])


In [18]:
numerical_features.drop('Unnamed: 0', axis=1, inplace=True)

In [21]:
mean = np.mean(numerical_features.values, axis=0)
std = np.std(numerical_features.values, axis=0)

standardized_numerical_features = (numerical_features.values - mean) / std

  x = asanyarray(arr - arrmean)
  standardized_numerical_features = (numerical_features.values - mean) / std


In [22]:
### 2 Compute covariance matrix
cov_matrix = np.cov(standardized_numerical_features.T)


### 3 Eigen Decomposition


In [25]:
print("Contains NaN values:", np.isnan(cov_matrix).any())
print("Contains Inf values:", np.isinf(cov_matrix).any())

Contains NaN values: True
Contains Inf values: False


In [29]:
cov_matrix = np.nan_to_num(cov_matrix, nan=np.nanmean(cov_matrix))


In [None]:
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

In [34]:
eigenvalues.shape

(16,)

In [38]:
feature_names = list(numerical_features.columns)
feature_names

['Price',
 'Open',
 'High',
 'Low',
 'Vol.',
 'Change %',
 'Year',
 'EPS',
 'Price_Return',
 'Log_Return',
 'Volatility',
 'Moving_Average',
 'RSI',
 'MACD',
 'Sharpe_Ratio',
 'Max_Drawdown']

In [39]:
eigenvalues

array([ 6.20731196e+00,  2.26949093e+00,  2.04280909e+00,  1.20609263e+00,
        9.92405406e-01,  1.02556844e+00,  6.76852462e-01, -2.15173333e-01,
        4.88032807e-01,  3.17628153e-01,  1.27824080e-01,  7.32463133e-05,
        2.90717070e-04,  3.90454296e-04,  1.28702134e-03,  1.00560592e-03])

In [41]:
feature_mapping = {
    feature: {
        "eigenvalue": eigenvalues[i],
        "eigenvector": eigenvectors[i]
    }
    for i, feature in enumerate(feature_names)
}

In [42]:
feature_mapping

{'Price': {'eigenvalue': 6.207311964969001,
  'eigenvector': array([-0.39409826,  0.01471869,  0.0236316 , -0.11637453, -0.02145233,
         -0.00442406, -0.16139493,  0.04194981, -0.01344508, -0.01264141,
          0.02984397, -0.53360909,  0.69014699, -0.1908252 , -0.05796664,
          0.00577897])},
 'Open': {'eigenvalue': 2.2694909262663594,
  'eigenvector': array([-0.39394528,  0.01388536,  0.02285068, -0.1155755 , -0.02030135,
         -0.02863204, -0.16239553,  0.05118574, -0.02107293, -0.01912106,
          0.02464463, -0.47901268, -0.63852422,  0.18278904, -0.35533646,
         -0.04744511])},
 'High': {'eigenvalue': 2.0428090884843955,
  'eigenvector': array([-0.39335836,  0.01662453,  0.02442086, -0.12166216, -0.01926727,
         -0.01923071, -0.16824471,  0.04433256, -0.01497578, -0.01564279,
          0.0274289 ,  0.50668161,  0.25848013,  0.64166352, -0.24211953,
         -0.05889459])},
 'Low': {'eigenvalue': 1.206092631914168,
  'eigenvector': array([-0.39470771,  0.

### 4 Sort feature map for descending eigenvalues


In [43]:
sorted_feature_map = dict(
    sorted(feature_mapping.items(), key=lambda item: item[1]['eigenvalue'], reverse=True)
)

In [44]:
sorted_feature_map

{'Price': {'eigenvalue': 6.207311964969001,
  'eigenvector': array([-0.39409826,  0.01471869,  0.0236316 , -0.11637453, -0.02145233,
         -0.00442406, -0.16139493,  0.04194981, -0.01344508, -0.01264141,
          0.02984397, -0.53360909,  0.69014699, -0.1908252 , -0.05796664,
          0.00577897])},
 'Open': {'eigenvalue': 2.2694909262663594,
  'eigenvector': array([-0.39394528,  0.01388536,  0.02285068, -0.1155755 , -0.02030135,
         -0.02863204, -0.16239553,  0.05118574, -0.02107293, -0.01912106,
          0.02464463, -0.47901268, -0.63852422,  0.18278904, -0.35533646,
         -0.04744511])},
 'High': {'eigenvalue': 2.0428090884843955,
  'eigenvector': array([-0.39335836,  0.01662453,  0.02442086, -0.12166216, -0.01926727,
         -0.01923071, -0.16824471,  0.04433256, -0.01497578, -0.01564279,
          0.0274289 ,  0.50668161,  0.25848013,  0.64166352, -0.24211953,
         -0.05889459])},
 'Low': {'eigenvalue': 1.206092631914168,
  'eigenvector': array([-0.39470771,  0.

In [54]:
# Top 5 componentss
k = 5

top_k_features = dict(
    list(sorted_feature_map.items())[:k]
)

for feature, eigen in top_k_features.items():
    print(f"Feature: {feature}")
    print(f"    Eigenvalue: {eigen['eigenvalue']:.3f}")
    print(f"    Eigenvector: {eigen['eigenvector']}")

Feature: Price
    Eigenvalue: 6.207
    Eigenvector: [-0.39409826  0.01471869  0.0236316  -0.11637453 -0.02145233 -0.00442406
 -0.16139493  0.04194981 -0.01344508 -0.01264141  0.02984397 -0.53360909
  0.69014699 -0.1908252  -0.05796664  0.00577897]
Feature: Open
    Eigenvalue: 2.269
    Eigenvector: [-0.39394528  0.01388536  0.02285068 -0.1155755  -0.02030135 -0.02863204
 -0.16239553  0.05118574 -0.02107293 -0.01912106  0.02464463 -0.47901268
 -0.63852422  0.18278904 -0.35533646 -0.04744511]
Feature: High
    Eigenvalue: 2.043
    Eigenvector: [-0.39335836  0.01662453  0.02442086 -0.12166216 -0.01926727 -0.01923071
 -0.16824471  0.04433256 -0.01497578 -0.01564279  0.0274289   0.50668161
  0.25848013  0.64166352 -0.24211953 -0.05889459]
Feature: Low
    Eigenvalue: 1.206
    Eigenvector: [-0.39470771  0.01138734  0.02087996 -0.11059511 -0.0220058  -0.01508265
 -0.15551793  0.04992458 -0.01988696 -0.01707986  0.0263563   0.47758325
 -0.13333295 -0.71219658 -0.21711369  0.04910445]
Feat