## Importar datos

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, mean_squared_error
import ta
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import f1_score

In [2]:
from sklearn.metrics import accuracy_score
import optuna

In [3]:
list_of_equity = [
    "aapl_5m_train.csv",
    "aapl_project_1m_test.csv",
    "aapl_project_1m_train.csv",
    "aapl_project_test.csv",
    "aapl_project_train.csv",
    "btc_project_1m_test.csv",
    "btc_project_1m_train.csv",
    "btc_project_test.csv",
    "btc_project_train.csv"
]

In [4]:
def reading_files(list_of_files : str):
    """
    list of files is going to be a list where all the files need 
    to be written as a string
    """

    dict_files = dict()
    for file in list_of_files:
        dict_files[file] = pd.read_csv(file)
    return dict_files

In [5]:
files = reading_files(list_of_equity)
data = files["aapl_project_train.csv"]
data.head()

## Indicadores

In [7]:
data_clean = data.loc[:, ["Close"]]
data_clean["Y"] = data_clean.shift(-15)
data_clean["Close_t1"] = data.loc[:, ["Close"]].shift(1)
data_clean["Close_t2"] = data.loc[:, ["Close"]].shift(2)
data_clean["Close_t3"] = data.loc[:, ["Close"]].shift(3)
data_clean["Close_t4"] = data.loc[:, ["Close"]].shift(4)
data_clean["Close_t5"] = data.loc[:, ["Close"]].shift(5)
data_clean["rsi_10"] = ((ta.momentum.RSIIndicator(data["Close"], window=10)).rsi())
data_clean["rsi_20"] = ((ta.momentum.RSIIndicator(data["Close"], window=20)).rsi())
data_clean["rsi_30"] = ((ta.momentum.RSIIndicator(data["Close"], window=30)).rsi())
data_clean["macd_10_24_7"] = ((ta.trend.MACD(close=data["Close"], window_slow=24, window_fast=10, window_sign=7)).macd())
data_clean["macd_12_26_9"] = ((ta.trend.MACD(close=data_clean["Close"], window_slow=26, window_fast=12, window_sign=9)).macd())
data_clean["macd_5_35_5"] = ((ta.trend.MACD(close=data_clean["Close"], window_slow=35, window_fast=5, window_sign=5)).macd())

### bollinger bands
bollinger_20_2 = ta.volatility.BollingerBands(close=data_clean["Close"], window=20, window_dev=2)
data_clean["bollinger_20_2_hband"] = bollinger_20_2.bollinger_hband()
data_clean["bollinger_20_2_lband"] = bollinger_20_2.bollinger_lband()
data_clean["bollinger_20_2_mavg"] = bollinger_20_2.bollinger_mavg()

bollinger_10_1_5 = ta.volatility.BollingerBands(close=data_clean["Close"], window=10, window_dev=1.5)
data_clean["bollinger_10_1_5_hband"] = bollinger_10_1_5.bollinger_hband()
data_clean["bollinger_10_1_5_lband"] = bollinger_10_1_5.bollinger_lband()
data_clean["bollinger_10_1_5_mavg"] = bollinger_10_1_5.bollinger_mavg()

bollinger_50_2_5 = ta.volatility.BollingerBands(close=data_clean["Close"], window=50, window_dev=2.5)
data_clean["bollinger_50_2_5_hband"] = bollinger_50_2_5.bollinger_hband()
data_clean["bollinger_50_2_5_lband"] = bollinger_50_2_5.bollinger_lband()
data_clean["bollinger_50_2_5_mavg"] = bollinger_50_2_5.bollinger_mavg()
data_clean = data_clean.dropna()

# data_clean["atr_14"] = (ta.volatility.AverageTrueRange(high=data["High"], low=data["Low"], close=data["Close"], window=14)).average_true_range()
# data_clean["atr_10"] = (ta.volatility.AverageTrueRange(high=data["High"], low=data["Low"], close=data["Close"], window=10)).average_true_range()
# data_clean["atr_20"] = (ta.volatility.AverageTrueRange(high=data["High"], low=data["Low"], close=data["Close"], window=20)).average_true_range()


## Visualización en plotly

In [None]:
# import plotly.graph_objs as go
# import plotly.io as pio

# # Graficar Bandas de Bollinger
# fig = go.Figure()

# # Graficar el precio de cierre
# fig.add_trace(go.Scatter(x=data_clean.index, y=data_clean["Close"], mode='lines', name='Close', line=dict(color='blue')))

# # Graficar las Bandas de Bollinger para la configuración 20, 2
# fig.add_trace(go.Scatter(x=data_clean.index, y=data_clean["bollinger_20_2_hband"], mode='lines', name='Bollinger 20,2 High Band', line=dict(color='red')))
# fig.add_trace(go.Scatter(x=data_clean.index, y=data_clean["bollinger_20_2_lband"], mode='lines', name='Bollinger 20,2 Low Band', line=dict(color='green')))
# fig.add_trace(go.Scatter(x=data_clean.index, y=data_clean["bollinger_20_2_mavg"], mode='lines', name='Bollinger 20,2 MAVG', line=dict(color='orange')))

# # Graficar las Bandas de Bollinger para la configuración 10, 1.5
# fig.add_trace(go.Scatter(x=data_clean.index, y=data_clean["bollinger_10_1_5_hband"], mode='lines', name='Bollinger 10,1.5 High Band', line=dict(color='purple')))
# fig.add_trace(go.Scatter(x=data_clean.index, y=data_clean["bollinger_10_1_5_lband"], mode='lines', name='Bollinger 10,1.5 Low Band', line=dict(color='brown')))
# fig.add_trace(go.Scatter(x=data_clean.index, y=data_clean["bollinger_10_1_5_mavg"], mode='lines', name='Bollinger 10,1.5 MAVG', line=dict(color='pink')))

# # Graficar las Bandas de Bollinger para la configuración 50, 2.5
# fig.add_trace(go.Scatter(x=data_clean.index, y=data_clean["bollinger_50_2_5_hband"], mode='lines', name='Bollinger 50,2.5 High Band', line=dict(color='cyan')))
# fig.add_trace(go.Scatter(x=data_clean.index, y=data_clean["bollinger_50_2_5_lband"], mode='lines', name='Bollinger 50,2.5 Low Band', line=dict(color='magenta')))
# fig.add_trace(go.Scatter(x=data_clean.index, y=data_clean["bollinger_50_2_5_mavg"], mode='lines', name='Bollinger 50,2.5 MAVG', line=dict(color='yellow')))

# fig.update_layout(title='Bandas de Bollinger', xaxis_title='Fecha', yaxis_title='Precio', legend_title='Indicadores')
# pio.show(fig)

# # Graficar ATR
# fig_atr = go.Figure()

# fig_atr.add_trace(go.Scatter(x=data_clean.index, y=data_clean["atr_14"], mode='lines', name='ATR 14', line=dict(color='blue')))
# fig_atr.add_trace(go.Scatter(x=data_clean.index, y=data_clean["atr_10"], mode='lines', name='ATR 10', line=dict(color='red')))
# fig_atr.add_trace(go.Scatter(x=data_clean.index, y=data_clean["atr_20"], mode='lines', name='ATR 20', line=dict(color='green')))

# fig_atr.update_layout(title='Average True Range (ATR)', xaxis_title='Fecha', yaxis_title='ATR', legend_title='Indicadores')
# pio.show(fig_atr)


## Continue

In [8]:
data_clas = data_clean.drop("Y", axis=1).copy()

# Filtrar solo las columnas que contienen al menos un valor NaN
columns_with_nan = data_clas.columns[data_clas.isna().any()].tolist()

# Crear un nuevo DataFrame solo con las columnas filtradas
df_with_nan = data_clas[columns_with_nan]
df_with_nan

In [9]:
from sklearn.metrics import confusion_matrix
def calculate_confusion_matrix_metrics(model, X_train, y_train):
    y_pred = model.predict(X_train)

    mat = confusion_matrix(y_train, y_pred)
    true_negatives = mat[0, 0]
    false_negatives = mat[1, 0]
    true_positives = mat[1, 1]
    false_positives = mat[0, 1]

    return {
        "confusion_matrix": mat,
        "true_negatives": true_negatives,
        "false_negatives": false_negatives,
        "true_positives": true_positives,
        "false_positives": false_positives
    }
def fpr(false_positives, true_negatives):
    return false_positives / (false_positives + true_negatives)


## Dividimos el dataset

In [10]:
data_clas["Y"] = data_clas.Close < data_clas.Close.shift(-1)

X_train, X_test, y_train, y_test = train_test_split(data_clas.drop("Y", axis=1),
                                                    data_clas.Y,
                                                    shuffle=False, test_size=0.2)




In [11]:
classification_model = LogisticRegression().fit(X_train, y_train)

logistic_pred = classification_model.predict(X_train)

logistic_score = classification_model.score(X_train, y_train)


### Classification V2



ran_forest = RandomForestClassifier().fit(X_train,y_train)
svc = SVC(C=500, max_iter=10_000).fit(X_train,y_train)
xgb = XGBClassifier().fit(X_train,y_train)

## F1 score

### Regresión Lógistica
f1_score_logistic = f1_score(y_train, classification_model.predict(X_train))
f1_score_RanFore =  f1_score(y_train, ran_forest.predict(X_train))
f1_score_svc = f1_score(y_train, svc.predict(X_train))
f1_score_xgb =  f1_score(y_train, xgb.predict(X_train))



metrics_svc = calculate_confusion_matrix_metrics(ran_forest, X_train, y_train)
metrics_xgb = calculate_confusion_matrix_metrics(xgb, X_train, y_train)

fpr_svc = fpr(metrics_svc["false_positives"], metrics_svc["true_negatives"])
fpr_xgb = fpr(metrics_xgb["false_positives"], metrics_xgb["true_negatives"])


## Optimizando XGBoost

In [19]:
# Definir la función objetivo
def objective(trial):
#     # Definir el rango de valores para los hiperparámetros
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    max_depth = trial.suggest_categorical('max_depth', [3, 5, 7, 9, 12, 15])
    max_leaves = trial.suggest_int('max_leaves', 2, 100)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    booster = trial.suggest_categorical('kernel', ["Linear", "Poly", "RBF"])
    gamma = trial.suggest_float('gamma', 1e-10, 1e1, log=True)
    reg_lambda = trial.suggest_float('reg_lambda', 1e-5, 1e2, log=True)

   
    # Crear el modelo XBGoost
    model = XGBClassifier().fit(X_train,y_train)
    
    # Entrenar el modelo
    model.fit(X_train, y_train)
    
    # Evaluar el modelo
    y_pred = model.predict(X_test)
     # Calculate confusion matrix
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    
    # Calculate FPR
    fpr = fp / (fp + tn)
    
    return fpr

# Crear un objeto de estudio
study = optuna.create_study(direction="minimize")

# Ejecutar el proceso de optimización
study.optimize(objective, n_trials=100)


# Mostrar los mejores parámetros
#saved_study = optuna.load_study(study_name=study, storage=storage_url)
#storage_url = "sqlite:///example.db"
print("Best trial:", study.best_trial.number)
print("Best value:", study.best_trial.value)
print("Best hyperparameters:", study.best_params)