# ValueInvestor

**Goal(s):**

Predict stock price valuations on a daily, weekly and monthly basis. Recommend BUY, HOLD, SELL decisions. Maximize capital returns, minimize losses. Ideally a loss should never happen. Minimize HOLD period.

**Data Description:**

A set of portfolio companies trading data from emerging markets including 2020 Q1-Q2-Q3-Q4 2021 Q1 stock prices. Each company stock is provided in different sheets. Each market's operating days varies based on the country of the company and the market the stocks are exchanged.

**Success Metrics:**

Evaluate on the basis of capital returns. Use Bollinger Bands to measure your systems effectiveness.

In [None]:
#importing libraries
import os
import math
import imblearn
import numpy as np
import pandas as pd
import warnings
from hyperopt import tpe
import seaborn as sns
from tpot import TPOTClassifier
warnings.filterwarnings('ignore')
from datetime import date
from skopt import BayesSearchCV
from fbprophet import Prophet
from tensorflow import keras
import matplotlib.pyplot as plt
from hpsklearn import HyperoptEstimator
from hpsklearn import any_classifier
from hpsklearn import any_preprocessing
from numpy import unique
from numpy import argmax
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model
from collections import Counter
from sklearn.metrics import roc_curve
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from imblearn.over_sampling import SMOTE
from keras.models import Sequential
from keras.layers import  GRU
from pmdarima.arima import auto_arima
from statsmodels.tsa.arima.model import ARIMA
from tensorflow.keras.optimizers import SGD
from keras.layers.recurrent_v2 import GRU
from sklearn.preprocessing import StandardScaler
from keras.layers import Dense, Dropout, LSTM
from sklearn.metrics import mean_squared_error, mean_absolute_error
from fbprophet.diagnostics import cross_validation, performance_metrics
from fbprophet.plot import add_changepoints_to_plot, plot_cross_validation_metric
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
#open data
dfColombia = pd.read_excel('2020Q1Q2Q3Q4-2021Q1.xlsx', sheet_name = 'Colombia - Cementos Argos SA (C') 
dfColombia

In [None]:
# splitting dataframe by row
df_1 = dfColombia.iloc[:303,:]
df_2 = dfColombia.iloc[303:,:]

In [None]:
#replace - characters
df_1['Change %'] = df_1['Change %'].astype('string')
df_1['Vol.'] = df_1['Vol.'].astype('string')

In [None]:
df_1['Change %'] = df_1['Change %'].str.replace("%", " ")
#replace - characters
df_1['Vol.'] = df_1['Vol.'].str.replace("M", " ")
#replace - characters
df_1['Vol.'] = df_1['Vol.'].str.replace("K", " ")
df_1['Vol.'] = df_1['Vol.'].astype(float)
df_1['Price'] = df_1['Price'].astype(float)
df_1['Open'] = df_1['Open'].astype(float)
df_1['High'] = df_1['High'].astype(float)
df_1['Low'] = df_1['Low'].astype(float)
df_1['Change %'] = df_1['Change %'].astype(float)
#datetime
df_1['Date'] = pd.to_datetime(df_1['Date'])

In [None]:
#filter df by 2020
filtered_df = df_1[df_1["Date"].isin(pd.date_range('2020-2-1', '2020-12-31'))]
filtered_df

In [None]:
#set index
current_df = filtered_df.set_index('Date')

In [None]:
#sma
def get_sma(prices, rate):
    return prices.rolling(rate).mean()

In [None]:
closing_prices = current_df['Price'] # Use only closing prices

In [None]:
sma = get_sma(closing_prices, 20) # Get 20 day SMA

In [None]:
symbol = ' SA'

In [None]:
#bollinger bands
def get_bollinger_bands(prices, sma, rate=20):
    sma = get_sma(prices, rate)
    std = prices.rolling(rate).std()
    bollinger_up = sma + std * 2 # Calculate top band
    bollinger_down = sma - std * 2 # Calculate bottom band
    return bollinger_up, bollinger_down

In [None]:
bollinger_up, bollinger_down = get_bollinger_bands(closing_prices, sma)

In [None]:
current_df['sma_20'] = get_sma(current_df['Price'], 20)
current_df.tail()

In [None]:
#bollinger bands
current_df['upper_bb'], current_df['lower_bb'] = get_bollinger_bands(current_df['Price'], current_df['sma_20'], 20)
current_df.tail()

In [None]:
#crosspoints
buyers = current_df[current_df['Price'] <= current_df['lower_bb']]
sellers = current_df[current_df['Price'] >= current_df['upper_bb']]

In [None]:
pip install tpot

Tpot Looks for the best combination of models and tunings

In [None]:
modeldf = current_df.drop(['actions','lower_bb','upper_bb','sma_20'], axis =1)

In [None]:
counter = Counter(Yxis)
print(counter)

In [None]:
# transform the dataset
scaler = StandardScaler()
current_df_norm = scaler.fit_transform(modeldf)
Xaxis = np.array(current_df_norm)
Yxis = np.array(current_df['actions'])
#balance the labels

oversample = SMOTE()
Xaxis, Yxis = oversample.fit_resample(Xaxis, Yxis)


In [None]:
scores = []
winning_pipes = []
tpot = TPOTClassifier(verbosity=3, 
                      scoring="balanced_accuracy", 
                      random_state=23, 
                      periodic_checkpoint_folder="tpot_mnst1.txt", 
                      n_jobs=-1, 
                      generations=10, 
                      population_size=100)
# run three iterations and time them
for x in range(3):
    
    tpot.fit(Xaxis, Yxis)
    winning_pipes.append(tpot.fitted_pipeline_)
    scores.append(tpot.score(Xaxis, Yxis))
print('Scores:', scores)  
print('Winning pipelines:', winning_pipes)

Bayesian Optimization finds the best optimized parameteres for a given model.

In [None]:
opt = BayesSearchCV(
    RandomForestClassifier(),
    {
      'bootstrap': [True, False],
               'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
               'max_features': ['auto', 'sqrt'],
               'min_samples_leaf': [1, 2, 4],
               'min_samples_split': [2, 5, 10],
               'n_estimators': [130, 180, 230],
    },
    n_iter=32,
    cv=3
)

opt.fit(Xaxis,Yxis)

print("val. score: %s" % opt.best_score_)
print("test score: %s" % opt.score(Xaxis,Yxis))

In [None]:
pip install scikit-optimize

Hyper optimization is another good technique that finds the best optimized parameteres for a given model.

In [None]:
Hyper optimization is another good technique that finds the best optimized parameteres for a given model.

In [None]:
# define search
modelopt = HyperoptEstimator()


# perform the search
modelopt.fit(Xaxis,Yxis)

# summarize performance
acc = modelopt.score(Xaxis,Yxis)
print("Accuracy: %.3f" % acc)
# summarize the best model
print(modelopt.best_model())

Regression and Classification Model combination.

In [None]:
#axis's
xOpen = current_df.Open.values
yPrice = current_df.Price.values
y_class = current_df.actions

In [None]:
#splitting
train_X, test_X = xOpen[0:int(0.725*(len(xOpen)))], xOpen[int(0.725*(len(xOpen))):]
train_Y, test_Y = yPrice[0:int(0.725*(len(yPrice)))], yPrice[int(0.725*(len(yPrice))):]
y_train_class, y_test_class = y_class[0:int(0.725*(len(y_class)))], y_class[int(0.725*(len(y_class))):]

In [None]:

#number of features and number of classes
n_features = train_X.shape[1]
n_class = len(unique(y_class))


In [None]:
# input
visible = Input(shape=(n_features,))
hidden1 = Dense(160, activation='relu', kernel_initializer='he_normal')(visible)
hidden2 = Dense(160, activation='relu', kernel_initializer='he_normal')(hidden1)
# regression output
out_reg = Dense(1, activation='linear')(hidden2)
# classification output
out_clas = Dense(n_class, activation='softmax')(hidden2)
# define model
modelmix = Model(inputs=visible, outputs=[out_reg, out_clas])
# compile the keras model
modelmix.compile(loss=['mse','sparse_categorical_crossentropy'], optimizer='adam')
# fit the keras model on the dataset
modelmix.fit(train_X, [train_Y,y_train_class], epochs=150, batch_size=1, verbose=2)
# make predictions on test set
yhat1, yhat2 = modelmix.predict(test_X)
# calculate error for regression model
error = mean_absolute_error(test_Y, yhat1)
print('MAE: %.3f' % error)
# evaluate accuracy for classification model
yhat2 = argmax(yhat2, axis=-1).astype('int')
acc = accuracy_score(y_test_class, yhat2)
print('Accuracy: %.3f' % acc)

In [None]:
#plot results
plt.plot(yPrice[:61], color = 'black', label = ' Stock Price')
plt.plot(yhat1, color = 'green', label = 'Predicted  Stock Price')
plt.plot(yhat2, color = 'red', label = 'Labels')
plt.title(' Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel(' Stock Price')
plt.legend()
plt.show()

Time Series Classification.

In [None]:
#x shape
x_totrain = train_X.reshape((train_X.shape[0], train_X.shape[1], 1))

In [None]:
#model
def make_model(input_shape):
    input_layer = keras.layers.Input(input_shape)

    conv1 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(input_layer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.ReLU()(conv1)

    conv2 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.ReLU()(conv2)

    conv3 = keras.layers.Conv1D(filters=3, kernel_size=3, padding="same")(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.ReLU()(conv3)

    gap = keras.layers.GlobalAveragePooling1D()(conv3)

    output_layer = keras.layers.Dense(n_class, activation="softmax")(gap)

    return keras.models.Model(inputs=input_layer, outputs=output_layer)


modelkeras = make_model(input_shape=x_totrain.shape[1:])
keras.utils.plot_model(modelkeras, show_shapes=True)

In [None]:
#train
epochs = 50
batch_size = 4
optimizer = keras.optimizers.SGD(learning_rate=0.001)
callbacks = [
    keras.callbacks.ModelCheckpoint(
        "best_model.h5", save_best_only=True, monitor="val_loss"
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=5, min_lr=0.0001
    ),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=20, verbose=1),
]
modelkeras.compile(
    optimizer=optimizer,
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)
history = modelkeras.fit(
    x_totrain,
    y_train_class,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    validation_split=0.2,

In [None]:
#x test reshape
retestxx = test_X.reshape(61,1)

In [None]:
x_totest = retestxx.reshape((retestxx.shape[0], retestxx.shape[1], 1))

In [None]:
#x test predict
yhatkeras = modelkeras.predict(x_totest)

In [None]:
#plot results
metric = "sparse_categorical_accuracy"
plt.figure()
plt.plot(history.history[metric])
plt.plot(history.history["val_" + metric])
plt.title("model " + metric)
plt.ylabel(metric, fontsize="large")
plt.xlabel("epoch", fontsize="large")
plt.legend(["train", "val"], loc="best")
plt.show()
plt.close()

Daily returns based on a percantage change in a price by row.

In [None]:
current_df['daily_returns'] = current_df['Price'].pct_change()

In [None]:
#count missing values
current_df['daily_returns'].isna().sum()

In [None]:
#remove missing values
current_df['daily_returns'].fillna(int(current_df['daily_returns'].mean()), inplace=True)

In [None]:
#plot results
fig = plt.figure()
ax1 = fig.add_axes([0.1,0.1,0.8,0.8])
ax1.plot(current_df['daily_returns'])
ax1.set_xlabel("Date")
ax1.set_ylabel("Percent")
ax1.set_title("daily returns data")
plt.show()

Testing Combination models to predict labels based on daily returns percantage change.

In [None]:
# axis's
xPrice = current_df.Price.values
yreturn = current_df.daily_returns.values
y_class = current_df.actions
#split
trainxxprice, testxxprice = xPrice[0:int(0.725*(len(xPrice)))], xPrice[int(0.725*(len(xPrice))):]
trainyreturns, testyreturns = yreturn[0:int(0.725*(len(yreturn)))], yreturn[int(0.725*(len(yreturn))):]
y_train_class, y_test_class = y_class[0:int(0.725*(len(y_class)))], y_class[int(0.725*(len(y_class))):]
trainxxprice = trainxxprice.reshape(160,1)
n_features = trainxxprice.shape[1]
n_class = len(unique(y_class))

In [None]:
# input
visible = Input(shape=(n_features,))
hidden1 = Dense(160, activation='relu', kernel_initializer='he_normal')(visible)
hidden2 = Dense(160, activation='relu', kernel_initializer='he_normal')(hidden1)
# regression output
out_reg = Dense(1, activation='linear')(hidden2)
# classification output
out_clas = Dense(n_class, activation='softmax')(hidden2)
# define model
modeltest = Model(inputs=visible, outputs=[out_reg, out_clas])
# compile the keras model
modeltest.compile(loss=['mse','sparse_categorical_crossentropy'], optimizer='adam')
# fit the keras model on the dataset
modeltest.fit(trainxxprice, [trainyreturns,y_train_class], epochs=150, batch_size=1, verbose=2)
# make predictions on test set
yhat1test, yhat2test = modeltest.predict(testxxprice)
# calculate error for regression model
error = mean_absolute_error(testyreturns, yhat1test)
print('MAE: %.3f' % error)
# evaluate accuracy for classification model
yhat2 = argmax(yhat2test, axis=-1).astype('int')
acc = accuracy_score(testyreturns, yhat2test)
print('Accuracy: %.3f' % acc)

In [None]:
#plot results
plt.plot(yreturn[:61], color = 'black', label = ' Returns')
plt.plot(yhat1test, color = 'green', label = 'Predicted Returns')
plt.plot(yhat2test, color = 'red', label = 'Labels')
plt.title(' Returns Prediction')
plt.xlabel('Time')
plt.ylabel(' Returns')
plt.legend()
plt.show()

In [None]:
#train
epochs = 50
batch_size = 4
optimizer = keras.optimizers.SGD(learning_rate=0.0001)
callbacks = [
    keras.callbacks.ModelCheckpoint(
        "best_model.h5", save_best_only=True, monitor="val_loss"
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=5, min_lr=0.0001
    ),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=20, verbose=1),
]
modelkeras.compile(
    optimizer=optimizer,
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)
historyret = modelkeras.fit(
    trainxxprice,
    y_train_class,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    validation_split=0.2,
    
)

In [None]:
#predict
yhatkeras = modelkeras.predict(x_totest)

In [None]:
#plot results
metric = "sparse_categorical_accuracy"
plt.figure()
plt.plot(historyret.history[metric])
plt.plot(historyret.history["val_" + metric])
plt.title("model " + metric)
plt.ylabel(metric, fontsize="large")
plt.xlabel("epoch", fontsize="large")
plt.legend(["train", "val"], loc="best")
plt.show()
plt.close()