# Introduction

This notebook it will be used to create a momentum trading strategy. We are going to add momentum indicators to our strategy and see how it performs. Also, we will do some backtesting to see the results of our strategy.

We are going to use machine learning models to predict the signal of the strategy. The main idea is to use the technical indicators and the momentum indicators to predict the signal of the strategy and determine if it is better to buy or sell the stock.

The following models will be used:

* Logistic Regression
* Random Forest
* Neural Networks

**Steps to follow:**

1. Import the libraries

2. Import the data

3. Add technical indicators, signal and momentum indicators

4. Apply machine learning models to predict the signal

5. Vectorized Backtesting

6. Visualize the results

In [1]:
import pandas as pd
import numpy as np
import talib as ta
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import matthews_corrcoef, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import quantstats as qs

import process_data
import ml
import backtest

In [2]:
selected_stocks = pd.read_csv('./assets/selected_stocks.csv', index_col=0, parse_dates=True)
df_tve = selected_stocks[['tve']]
df_aapl = selected_stocks[['aapl']]

In [3]:
df_aapl = process_data.set_technical_indicators_and_signals(df_aapl, column_prices='aapl')
df_aapl = process_data.set_targeet(df_aapl, horizon=3, column_prices='aapl')

In [4]:
df_aapl.head()

Unnamed: 0_level_0,aapl,xs_rsi_14,x_rsi_pos_14,x_macd_pos_9_21_9,xs_macd_9_21_9,xs_macd_signal_9_21_9,target
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-03-03,0.982746,60.681822,1.0,1,0.037319,0.027247,-1.0
2000-03-06,0.964992,58.447633,1.0,1,0.03865,0.029528,-1.0
2000-03-07,0.943398,55.75865,1.0,1,0.036559,0.030934,1.0
2000-03-08,0.936681,54.91234,1.0,1,0.033641,0.031475,-1.0
2000-03-09,0.9386,55.121897,1.0,-1,0.031116,0.031403,-1.0


In [5]:
# df_train, df_validation = process_data.get_train_and_validation_df(df_aapl, train_size=0.8)
# X, y = process_data.get_X_y(df_train)

In [6]:
def print_up_down_counts(predictions):
    up_count = 0
    down_count = 0
    for p in predictions:
        for i in p.index:
            if p[i] > 0.5:
                up_count += 1
            else:
                down_count += 1

    print(f'Up count: {up_count}')
    print(f'Down count: {down_count}')

In [7]:
def set_prediction_column(df, predictions):
    for p in predictions:
    # Add the predictions to the dataframe
        # Where the dataframe has a date that is in the predictions, add the prediction
        for i in p.index:
            # If the prediction is greater than 0.5, set the prediction to 1
            if p[i] > 0.5:
                df.loc[i, 'prediction'] = 1
            else:
                df.loc[i, 'prediction'] = -1



In [8]:
def handle_ml_model(df, model_name, model=None):
    if model == None:
        model = ml.create_model(model_name)
    X, y = process_data.get_X_y(df)
    scores, predictions = ml.walkforward_with_purging_and_embargos(
        model=model,
        X=X,
        y=y,
        purged_window_size=0,
        embargo_period=2,
        lookback=100,
        test_size=10,
        scaler=StandardScaler(),
        scorer=matthews_corrcoef,
    )
    print_up_down_counts(predictions)
    set_prediction_column(df, predictions)
    return scores, predictions, X, y, df, model

In [9]:
# param_grid = {
#  'bootstrap': [True, False],
#  'bootstrap_features': [True, False],    
#  'n_estimators': [5, 10, 15],
#  'max_samples' : [0.6, 0.8, 1.0],
#  'base_estimator__bootstrap': [True, False],    
#  'base_estimator__n_estimators': [50, 75, 100, 200, 300],
#  'base_estimator__max_features' : [0.6, 0.8, 1.0]
# }

In [10]:
df_train_rf, df_validation_rf = process_data.get_train_and_validation_df(df_aapl, train_size=0.8)

scores_train_rf, predicton_train_rf, X_train_rf, y_train_rf, df_train_2_rf, model = handle_ml_model(df_train_rf, 'random_forest')
scores_validation_rf, predicton_validation_rf, X_validation_rf, y_validation_rf, df_validation_2_rf = handle_ml_model(df_validation_rf, 'random_forest', model=model)

In [None]:
df_train_2_rf.to_csv('./assets/df_train_rf.csv')
df_validation_2_rf.to_csv('./assets/df_validation_rf. csv')

In [None]:
def remove_timezone(indf):
    outdf = indf.copy()
    outdf.index = outdf.index.tz_localize(None)
    return outdf

In [None]:
def backtest_result(df, column_prices=None):
    df['returns'] = df[column_prices].pct_change()
    df['strategy'] = df['prediction'].shift(1) * df['returns']
    df['creturns'] = (1 + df['strategy']).cumprod()
    ann_ret = (df['creturns'].iloc[-1])**(252/len(df)) - 1
    ann_vol = np.std(df['strategy']) * np.sqrt(252)
    df['b&h'] = df['returns'].cumsum().apply(np.exp)

    print("Annualized Return:", ann_ret)
    print("Annualized Volatility:", ann_vol)

    df[['creturns']].plot(figsize=(10, 6))
    qs.reports.full(remove_timezone(df['creturns']), remove_timezone(df['b&h']))

In [None]:
backtest_result(df_validation_2_rf, column_prices='aapl')

In [None]:
df_train_svc, df_validation_svc = process_data.get_train_and_validation_df(df_aapl, train_size=0.8)

scores_train_svc, predicton_train_svc, X_train_svc, y_train_svc, df_train_svc_2 = handle_ml_model(df_train_svc, 'svc')
scores_validation_svc, predicton_validation_svc, X_validation_svc, y_validation_svc, df_validation_svc_2 = handle_ml_model(df_validation_svc, 'svc')

In [None]:
df_train_lr, df_validation_lr = process_data.get_train_and_validation_df(df_aapl, train_size=0.8)
scores_train_lr, predicton_train_lr, X_train_lr, y_train_lr, df_train_lr_2 = handle_ml_model(df_train_lr, 'logistic_regression')
scores_validation_lr, predicton_validation_lr, X_validation_lr, y_validation_lr, df_validation_lr_2 = handle_ml_model(df_validation_lr, 'logistic_regression')

In [None]:
backtest_result(df_validation_svc_2, column_prices='aapl')

In [None]:
backtest_result(df_validation_lr_2, column_prices='aapl')

In [None]:
df_validation_2_rf_copy = df_validation_2_rf.copy()

In [None]:
df_validation_2_rf_copy.tail()

In [None]:
# Remove nan values
df_validation_2_rf_copy = df_validation_2_rf_copy.dropna()

In [None]:
# Change the value of prediction 0 to -1
df_validation_2_rf_copy['prediction'] = df_validation_2_rf_copy['prediction'].replace(0, -1)

In [None]:
backtest_result(df_validation_2_rf_copy, column_prices='aapl')

In [None]:
df_validation_2_rf_copy['prediction']

In [None]:
df_validation_2_rf_copy['target']

In [None]:
def get_metrics(real, predicted):
    matrix = confusion_matrix(real, predicted)
    accuracy = accuracy_score(real, predicted)
    precision = precision_score(real, predicted)
    recall = recall_score(real, predicted)
    f1 = f1_score(real, predicted)

    return matrix, accuracy, precision, recall, f1

In [None]:
def visualize_metrics(real, predicted, title):
    matrix, accuracy, precision, recall, f1 = get_metrics(real, predicted)

    plt.figure(figsize=(3, 3))
    matrix = pd.DataFrame(matrix, index=['Down', 'Up'], columns=['Down', 'Up'])
    plt.matshow(matrix, cmap=plt.cm.Blues, alpha=0.3)
    plt.title(title)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.xticks([-1, 1], ['Down', 'Up'])
    plt.yticks([-1, 1], ['Down', 'Up'])
    tags = ['True Neg', 'False Pos', 'False Neg', 'True Pos']

    for i in range(2):
        for j in range(2):
            plt.text(j, i, f'{tags[i*2+j]}: {matrix.iloc[i, j]}', ha='center', va='center', color='black')

    print(f'Accuracy: {accuracy}')
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    print(f'F1: {f1}')


    # plt.text(0, -0.5, f'Accuracy: {accuracy}', ha='center', va='center', color='black')
    # plt.text(1, -0.5, f'Precision: {precision}', ha='center', va='center', color='black')
    # plt.text(0, 2, f'Recall: {recall}', ha='center', va='center', color='black')
    # plt.text(1, 2, f'F1: {f1}', ha='center', va='center', color='black')
    
    plt.show()

In [None]:
def visualize_metrics_v2(real, predicted, title):
    """ Visualiza la matriz de confusión y métricas """
    
    #Código para calcular las métricas y matriz de confusión
    
    matriz, accuracy, precision, recall, f1 = \
                    get_metrics(real, predicted)
    
    #Código de matplotlib para graficar 
    plt.figure(figsize=(3, 3))
    matriz = pd.DataFrame(matriz, 
                          columns=["-1 : Down", "1 : Up"])
    plt.matshow(matriz, cmap=plt.cm.Blues, alpha=0.3)    
    plt.title("True")
    plt.ylabel("Predichas")
    plt.xticks(range(len(matriz.columns)), matriz.columns, rotation=45)
    plt.yticks(range(len(matriz.columns)), matriz.columns)
    etiquetas = (("Verdaderos\nnegativos", "Falsos\npositivos"),
                 ("Falsos\nnegativos", "Verdaderos\npositivos"))
    for i in range(len(matriz.columns)):
        for j in range(len(matriz.columns)):
            plt.text(i, j + 0.14, str(matriz.iloc[i, j]),
                     fontsize=30, ha="center", va="center")
            plt.text(i, j - 0.25, etiquetas[i][j],
                     fontsize=11.5, ha="center", va="center")           
    plt.text(1.60, -0.30, title, fontsize=25, c="red")
    plt.text(2.1, 0.10, "Accuracy: %0.2f" % accuracy, fontsize=20)
    plt.text(2.1, 0.40, "Precision: %0.2f" % precision, fontsize=20)
    plt.text(2.1, 0.70, "Recall: %0.2f" % recall, fontsize=20)
    plt.text(2.1, 1.00, "F1: %0.2f" % f1, fontsize=20)    
    plt.show()

In [None]:
visualize_metrics_v2(df_validation_2_rf_copy['target'], df_validation_2_rf_copy['prediction'], 'Random Forest')

In [None]:
df_validation_lr_2_copy = df_validation_lr_2.copy()
# Remove nan values
df_validation_lr_2_copy = df_validation_lr_2_copy.dropna()
# Convert 0 to -1
df_validation_lr_2_copy['prediction'] = df_validation_lr_2_copy['prediction'].replace(0, -1)

In [None]:
visualize_metrics_v2(df_validation_lr_2_copy['target'], df_validation_lr_2_copy['prediction'], 'Logistic Regression')

In [None]:
df_validation_svc_2_copy = df_validation_svc_2.copy()
# Remove nan values
df_validation_svc_2_copy = df_validation_svc_2_copy.dropna()
# Convert 0 to -1
df_validation_svc_2_copy['prediction'] = df_validation_svc_2_copy['prediction'].replace(0, -1)

In [None]:
visualize_metrics_v2(df_validation_svc_2_copy['target'], df_validation_svc_2_copy['prediction'], 'Support Vector Classifier')

In [12]:
import yfinance as yf
import pandas as pd
import talib as ta
import numpy as np
# Obtener precios de cierre de las acciones de Apple
prices = yf.download('AAPL', start='2010-01-01', end='2020-01-01')[['Close']]
print(prices.head())

sma_50 = prices['Close'].rolling(window=50).mean()
sma_200 = prices['Close'].rolling(window=200).mean()

# Obtener señales de compra y venta
signals = pd.Series(0, index=prices.index)
signals[sma_50 > sma_200] = 1 # Señal de compra
signals[sma_50 < sma_200] = -1 # Señal de venta

[*********************100%***********************]  1 of 1 completed
                              Close
Date                               
2010-01-04 00:00:00-05:00  7.643214
2010-01-05 00:00:00-05:00  7.656429
2010-01-06 00:00:00-05:00  7.534643
2010-01-07 00:00:00-05:00  7.520714
2010-01-08 00:00:00-05:00  7.570714


In [13]:
# Count number of signals
signals.value_counts()

 1    1775
-1     542
 0     199
dtype: int64

In [14]:
macd, macd_signal, macd_hist = ta.MACD(prices['Close'].values, fastperiod=12, slowperiod=26, signalperiod=9)

# Obtener señales de compra y venta
signals = pd.Series(0, index=prices.index)
signals[macd > macd_signal] = 1 # Señal de compra
signals[macd < macd_signal] = -1 # Señal de venta

# Agregar señal de espera o mantener la posición actual
signals[np.absolute(macd - macd_signal) < 0.05] = 0

In [16]:
# Count number of signals
signals.value_counts()

 0    879
 1    855
-1    782
dtype: int64

In [23]:
rsi = ta.RSI(prices['Close'].values, timeperiod=14)

rsi = pd.Series(rsi, index=prices.index)

# Obtener señales de compra y venta
signals = pd.Series(0, index=prices.index)
signals[(rsi > 50) & (rsi.shift(1) <= 50)] = 1 # Señal de compra
signals[(rsi < 50) & (rsi.shift(1) >= 50)] = -1 # Señal de venta

# Agregar señal de espera o mantener la posición actual

In [25]:
signals.value_counts()

 0    2267
 1     125
-1     124
dtype: int64