In [7]:
import pandas as pd
import numpy as np

from datetime import date
import matplotlib.pyplot as plt

import datetime
from dateutil.relativedelta import relativedelta
import calendar

from keras.models import Sequential
from keras.layers import LSTM,Dropout,Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

import warnings
warnings.filterwarnings("ignore")

ModuleNotFoundError: No module named 'tensorflow'

In [8]:
# Importing market-values.csv
market_values = pd.read_csv('hackathon-seed-data-main/market-values.csv')  # Edit file name
market_values['marketValueDate'] = pd.to_datetime(market_values['marketValueDate'])
market_values

FileNotFoundError: [Errno 2] No such file or directory: 'hackathon-seed-data-main/market-values.csv'

In [9]:
class MarketValuePrediction():
    def __init__(self, marketValueDF):
        print('INFO: Successfully Initiated')
        self.marketValueDF = marketValueDF
        self.marketValueDF['marketValueDate'] = pd.to_datetime(self.marketValueDF['marketValueDate'])

        # Creating empty df
        allpredictionsDF = pd.DataFrame(columns = ['instrumentId', 'date', 'predictedMarketValue'], dtype=object)
        self.allpredictionsDF = allpredictionsDF
        self.lstm_model = None
        self.scaler = None

        instrumentpredictionDF = pd.DataFrame(columns = ['instrumentId', 'date', 'predictedMarketValue'], dtype=object)
        self.instrumentpredictionDF = instrumentpredictionDF
        self.lst_df = []
        self.time_step = 3
        

    def get_all_instrument_id(self):
        '''
        Output: Returns a list of all instrument ids in the market-value.csv
        '''
        lst = self.marketValueDF['instrumentId'].unique()
        return lst

    def get_market_values_for_instrument(self, instrument_id):
        '''
        Input: 
            Instrument Id of a stock
        Output:
            Returns a dataframe consisting of data for a particular given instrument id
        '''
        market_values_instrument = market_values[market_values.instrumentId == instrument_id]
        market_values_instrument = market_values_instrument.reset_index(drop=True)
        return market_values_instrument

    def getNext12Months(self):
        '''
        Output: Returns last date of the next 12 months in a list
        '''
        latest_current_date = self.marketValueDF['marketValueDate'].max()
        lst = []
        for i in range(12):
            res = datetime.date(latest_current_date.year, latest_current_date.month, calendar.monthrange(latest_current_date.year, latest_current_date.month)[1])
            lst.append(res)
            latest_current_date = latest_current_date + relativedelta(months=+1)
            
        return lst 

    def normalize(self, market_values_instrument):
        
        historical_value_instrument = market_values_instrument[['marketValue']]
        scaler = MinMaxScaler(feature_range=(0,1))

        scaled_data = scaler.fit_transform(historical_value_instrument)
        self.scaler = scaler
        return scaled_data

    def run_lstm_model(self, x_train_data,y_train_data):
        # Building the LSTM Model
        lstm_model = Sequential()
        lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train_data.shape[1], 1)))
        lstm_model.add(LSTM(units=50))
        lstm_model.add(Dense(1)) 

        lstm_model.compile(loss='mean_squared_error',optimizer='adam', metrics=['mean_absolute_error'])
        lstm_model.fit(x_train_data,y_train_data,epochs=30,batch_size=1,verbose=0)
        self.lstm_model = lstm_model

    def get_prediction_for_instrument(self, historical_value_instrument):
        inputs = historical_value_instrument[historical_value_instrument.shape[0] - 12 - self.time_step:].values
        inputs = inputs.reshape(-1,1)
        inputs = self.scaler.transform(inputs)

        X_test = []

        for i in range(self.time_step, inputs.shape[0]):
            X_test.append(inputs[i-self.time_step:i, 0])     

        X_test = np.array(X_test)
        X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
        predicted_stock_price = self.lstm_model.predict(X_test)
        predicted_stock_price = self.scaler.inverse_transform(predicted_stock_price)
        return predicted_stock_price

    def run_all_predictions(self):
        instrument_lst = self.get_all_instrument_id()
        next_12m_lst = self.getNext12Months()
        
        for i, instrument_id in enumerate(instrument_lst):
            print('Progress: {}/{}'.format(i+1, len(instrument_lst)))
            instrument_df = self.get_market_values_for_instrument(instrument_id)
            scaled_data = self.normalize(instrument_df)

            historical_value_instrument = instrument_df[['marketValue']]

            x_train_data, y_train_data = [], []

            for i in range(self.time_step, historical_value_instrument.shape[0]):
                x_train_data.append(scaled_data[i-self.time_step:i,0])
                y_train_data.append(scaled_data[i,0])
            
            x_train_data, y_train_data = np.array(x_train_data), np.array(y_train_data)
            x_train_data = np.reshape(x_train_data, (x_train_data.shape[0], x_train_data.shape[1], 1))

            print('INFO: Running LSTM model for instrument {}'.format(instrument_id))
            self.run_lstm_model(x_train_data, y_train_data)

            print('INFO: Predicting prices for instrument {}'.format(instrument_id))
            predicted_stock_price = self.get_prediction_for_instrument(historical_value_instrument)
            
            # Adding result into a temporary table 
            self.instrumentpredictionDF['instrumentId'] = [instrument_id] * 12
            self.instrumentpredictionDF['date'] = next_12m_lst
            self.instrumentpredictionDF['predictedMarketValue'] = predicted_stock_price

            instrumentprediction_DF = self.instrumentpredictionDF.copy()
            # Concatenate with overall table with all instruments
            self.lst_df.append(instrumentprediction_DF)
        
        self.allpredictionsDF = pd.concat(self.lst_df)
        
        print('SUCCESS: Prices of all instruments have been predicted for the next 12 months.')
        return self.allpredictionsDF


In [None]:
marketvaluepredictor = MarketValuePrediction(market_values)
marketprediction_df = marketvaluepredictor.run_all_predictions() #Takes around 20++ min to run