Este notebook se ha usado para analizar cual es el PCA que mejor se adaptaba a las necesidades del problema.

Se ha comentado todo el análisis dejando solo de manera ejecutable la parte de código que crea los PCA correspondientes.

In [4]:
import sys
import warnings
import pickle
import os
from datetime import datetime
from dateutil.relativedelta import relativedelta

import pandas as pd
import numpy as np
import numba as nb
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

sns.set()
warnings.filterwarnings("ignore")

app_path = '/Users/esanc147/Documents/business/bsm03/web_app'
if app_path in sys.path:
    from tools.tags import create_tags
else: 
    sys.path.append('/Users/esanc147/Documents/business/bsm03/web_app')
    from tools.tags import create_tags

## Valores por defecto

In [5]:
COLUMNS_TECH = ['symbol', 'date', 'MACD_Signal', 'MACD_Hist', 'MACD', 'SlowK', 'SlowD',
       'Chaikin A/D', 'OBV', 'RSI21', 'ADX21',
       'CCI21', 'Aroon Up21', 'Aroon Down21',
       'RSI28', 'ADX28', 'CCI28', 'Aroon Down28', 'Aroon Up28',
       'Real Lower Band28', 'Real Upper Band28', 'Real Middle Band28',
       'SMA50', 'RSI50', 'ADX50', 'CCI50', 'Aroon Up50',
       'Aroon Down50']
COLUMNS = ['symbol', 'date', 'close', 'volume', 'open', 'high', 'low']
U_COLUMNS = ['close', 'volume', 'MACD_Signal', 'MACD_Hist', 'MACD', 'SlowK', 'SlowD',
             'Chaikin A/D', 'OBV', 'RSI21', 'ADX21', 'CCI21', 'Aroon Up21', 'Aroon Down21',
             'RSI28', 'ADX28', 'CCI28', 'Aroon Down28', 'Aroon Up28', 'Real Lower Band28',
             'Real Upper Band28', 'Real Middle Band28', 'SMA50', 'RSI50', 'ADX50', 'CCI50',
             'Aroon Up50', 'Aroon Down50']
FULL_PATH = "/Users/esanc147/Documents/business/bsm03/web_app/data"
SYMBOLS = [s.split('.csv')[0] for s in os.listdir(f"{FULL_PATH}/tech/") if '.L' not in s]
PERIOD = [7, 14, 21, 28]

## Carga de los datos

In [6]:
symbols = SYMBOLS

total_dataframes = list()

i = 1
for symbol in symbols:
    if (i % 1000) == 0:
        print("+1000 symbols loaded")
    path_close = f"{FULL_PATH}/close/{symbol}.csv"
    df_close = pd.read_csv(path_close, names=COLUMNS)
    df_close['date'] = pd.to_datetime(df_close['date'])
    df_close['volume'] = df_close['volume'].astype(float)

    path_tech = f"{FULL_PATH}/tech/{symbol}.csv"
    df_tech = pd.read_csv(path_tech, names=COLUMNS_TECH)
    df_tech['date'] = pd.to_datetime(df_tech['date'])

#    list_df_tagged = []
#    for period in PERIOD:
#        df_aux = create_tags(df_close, period)
#        df_aux[f"pct_change_{period}"] = df_aux[f"pct_change_{period}"].astype(float)
#        df_aux[f"pct_change_{period}"] = df_aux[f"pct_change_{period}"].astype(float)
#        list_df_tagged.append(df_aux)
#    df_tagged = pd.concat(list_df_tagged, axis=1)
#    df_tagged.dropna(inplace=True)

    df_close = df_close.set_index(['symbol', 'date'])
    df_tech = df_tech.set_index(['symbol', 'date'])
    dataframe = pd.concat([df_close, df_tech], join='inner', axis=1)
    total_dataframes.append(dataframe)
    i += 1
tot_dataframe = pd.concat(total_dataframes)

+1000 symbols loaded
+1000 symbols loaded
+1000 symbols loaded
+1000 symbols loaded
+1000 symbols loaded
+1000 symbols loaded
+1000 symbols loaded


## Cargar los modelos

## Calcula las predicciones

## Aplicamos la reducción de dimensionalidad

## Guardamos los modelos de reducción de dimensionalidad

In [7]:
df_reset = tot_dataframe.reset_index()
df = df_reset[(df_reset['date'].dt.year > 2019)
              & (df_reset['date'].dt.month <= 6)] \
         .set_index(['symbol', 'date']) \
         .sort_values(by='date', ascending=False)

for day in PERIOD:

    path_rs = f"{FULL_PATH}/model/robust_scaler_{day}.pkl"
    
    scl = pickle.load(open(path_rs, 'rb'))
    
    X = df[U_COLUMNS].values
    X_scl = scl.transform(X)
    
    pca = PCA(4, random_state=32)
    pca.fit(X_scl)
    pickle.dump(pca, open(f"{FULL_PATH}/model/pca_{day}.pkl", 'wb'))