# Stock price prediction: A machine learning approach

Purpose: Predict stock price using Machine Learning methods, such as Support Vector Machines, Deep Neural Networks and Random Forest \
Authors: Caio Lopes De Souza, Silvio Sandoval Zocchi, Gabriel Rodrigues Palma

# Packages used in the project

In [2]:
# Data manipulation modules
import pandas as pd
import numpy as np

# Data visualisation modules
import matplotlib.pyplot as plt

# Machine Learning modules
from keras.models import Sequential
from keras.layers import Activation, Dense

# Cross-validation modules 
from sklearn.model_selection import TimeSeriesSplit

# Grid search module
from sklearn.model_selection import GridSearchCV

# Evaluation metrics modules
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score

In [3]:
def get_train_test_data(cripto_series, test_percentage):
    '''This functions creates the training and test data for the stock time series'''
    time_series_length = len(cripto_series)
    test_series = cripto_series.iloc[int(1 - time_series_length*test_percentage):]
    train_series = cripto_series.iloc[:int(1 - time_series_length*test_percentage)]
    
    return(test_series, train_series)

def get_rate_of_change(cripto_series, variable, n_days):
    '''This function computes several values for the rate of change and add them into a dataset'''
    for day in n_days: 
        variable_name = f"{day}_days_of_change"
        cripto_series[variable_name] = (cripto_series[variable] - cripto_series[variable].shift(day)) / cripto_series[variable].shift(day)
        
def get_moving_average(cripto_series, variable, n_days):
    '''This function computes several values for the moving average and add them into a dataset'''
    for day in n_days:
        variable_name = f"{day}_day_moving_average"
        cripto_series[variable_name] = cripto_series[variable].rolling(day).mean()
        
def get_ratio(cripto_series, variable_1, variable_2):
    '''This function computes the ratio between two variables and add it into a dataset'''
    variable_name = f"{variable_1}_{variable_2}_ratio"
    cripto_series[variable_name] = cripto_series[variable_1] / cripto_series[variable_2]
    
def train_and_predict(model, predictors, target, test_predictors):
    '''This function fits a model and then make predictions on it for a given amount of data'''
    model.fit(predictors, target)
    preds = model.predict(test_predictors)
    preds = pd.Series(preds, index=test_series.index)
    return preds

## Reading the Data

In [4]:
cripto_series = pd.read_csv('https://raw.githubusercontent.com/CaioLSouza/stock_price_prediction/main/Data/revolut_crypto_data.csv', index_col='time_open')
bitcoin_series = cripto_series.loc[cripto_series['name'] == 'Bitcoin']

## Cleaning variables

In [5]:
bitcoin_series.drop(['timestamp', 'id', 'Unnamed: 0', 'slug', 'time_close', 'time_high', 'time_low', 'name', 'symbol', 'ref_cur'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [6]:
bitcoin_series.head()

Unnamed: 0_level_0,open,high,low,close,volume,market_cap
time_open,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-04-29,134.444,147.488007,134.0,144.539993,0.0,1603769000.0
2013-04-30,144.0,146.929993,134.050003,139.0,0.0,1542813000.0
2013-05-01,139.0,139.889999,107.720001,116.989998,0.0,1298955000.0
2013-05-02,116.379997,125.599998,92.281898,105.209999,0.0,1168517000.0
2013-05-03,106.25,108.127998,79.099998,97.75,0.0,1085995000.0


In [7]:
bitcoin_series['target']  = np.array(bitcoin_series['close'].shift(-1) > bitcoin_series['close']).astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


## Creating new variables

In [8]:
get_rate_of_change(bitcoin_series, 'close', [3, 5, 7, 15])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [9]:
get_moving_average(bitcoin_series, 'close', [3, 5, 7, 15])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [10]:
get_ratio(bitcoin_series, 'close', 'open')
get_ratio(bitcoin_series, 'close', 'low')
get_ratio(bitcoin_series, 'close', 'high')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


## Proposed approach

In [11]:
bitcoin_series.dropna(axis=0, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


In [12]:
test_series, train_series = get_train_test_data(bitcoin_series,
                                                test_percentage = 0.1)

In [13]:
predictors = ['close', 'open', 'high', 'low', 'volume', '3_days_of_change',
             '5_days_of_change', '7_days_of_change', '15_days_of_change',
             '3_day_moving_average', '5_day_moving_average', '7_day_moving_average', '15_day_moving_average',
             'close_open_ratio', 'close_low_ratio', 'close_high_ratio', 'market_cap']

In [14]:
tscv = TimeSeriesSplit()

param_grid = [{'epochs': [100, 200, 300]}
]

dnn = Sequential()

grid_search = GridSearchCV(dnn, param_grid, cv=tscv,
                           scoring='precision')

grid_search.fit(train_series[predictors], train_series['target'])

TypeError: Cannot clone object '<keras.engine.sequential.Sequential object at 0x000001B49FF33A58>' (type <class 'keras.engine.sequential.Sequential'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.

In [None]:
grid_search.best_params_

In [None]:
# criando as redes neurais 
model = Sequential()
model.add(Dense(units=32, activation='relu', input_shape=(train[predictors].shape[1:])))
model.add(Dense(units=10, activation='relu', input_shape=(train[predictors].shape[1:])))
model.add(Dense(units=6, activation='relu', input_shape=(train[predictors].shape[1:])))
model.add(Dense(1, activation = 'sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', Precision()])