In [68]:
import os
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PowerTransformer

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error

from prophet import Prophet

from statsmodels.tsa.arima.model import ARIMA

from keras.layers import Dense,Dropout,LSTM
from keras.models import Sequential
import tensorflow as tf

from scipy import stats

# All CSV Files in a List

In [69]:
csv_files = ['003.csv', '064.csv', '070.csv', '088.csv', '135.csv', '143.csv', '168.csv', '169.csv', '213.csv', '226.csv', '228.csv', '230.csv', '234.csv', '274.csv', '319.csv', '355.csv', '367.csv', '374.csv', '376.csv', '387.csv', '390.csv', '405.csv', '416.csv', '433.csv', '436.csv', '445.csv', '473.csv', '484.csv', '495.csv', '528.csv', '542.csv', '546.csv', '552.csv', '558.csv', '559.csv', '577.csv', '592.csv', '614.csv', '640.csv', '645.csv', '653.csv', '662.csv', '665.csv', '702.csv', '791.csv', '804.csv', '859.csv', '875.csv', '914.csv','958.csv']

In [70]:
import datetime

def get_next_100_dates_excluding_weekends(start_date, exclude_weekends=True):

  dates = []
  current_date = start_date
  while(len(dates)!=100):
    current_date += datetime.timedelta(days=1)
    if exclude_weekends and current_date.weekday() in [5, 6]:
      continue
    dates.append(current_date)
  return dates


# Fixing Outliers in Given Data

In [71]:
def fix_outliers_iqr(dataframe):
    for column in dataframe.drop(columns=['Date']).columns:
        Q1 = dataframe[column].quantile(0.25)
        Q3 = dataframe[column].quantile(0.75)
        Q2 = dataframe[column].quantile(0.50)
        IQR = Q3 - Q1
        lower_limit = Q1 - 1.5 * IQR
        upper_limit = Q3 + 1.5 * IQR
        dataframe[column] = dataframe[column].apply(lambda x: lower_limit if x < lower_limit else upper_limit if x > upper_limit else x)
    return dataframe

# Plotting Moving Average

In [72]:
def Moving_Avg(df):
    ma_100_days = df['Close'].rolling(100).mean()
    plt.plot(df['Close'],c='r')
    plt.plot(ma_100_days,c='b')
    plt.show()


# Predictions Using Prophet Model

In [73]:
def predictor_prophet(path):
  df = pd.read_csv(path)
  df = fix_outliers_iqr(df)
  df['Date'] = pd.to_datetime(df['Date'])

  df = df[['Date','Close']]
  df.columns = ['ds','y']

  model = Prophet()
  model.fit(df)

  # Predicting closing price for next 100 days
  prediction_dates = model.make_future_dataframe(periods=100)
  predictions = model.predict(prediction_dates) 
  model.plot(predictions).show()

  pred_df = predictions[['ds','yhat']][-100:]
  pred_df.columns = ['Date','Price']

  ids=[]
  for i in range(1,101):
    ID = f"{os.path.splitext(os.path.basename(path))[0]}_#{i}"
    ids.append(ID)

  pred_df['ID'] = ids
  print(pred_df)
  return pred_df

# Prediction using ARIMA
1. ARIMA gave the best results for predicting stock prices

### Below is the function made to get best values of P,Q,D 
1. Though It didn't help much in getting the best results

In [74]:

def grid_search_arima(data, p_range, d_range, q_range):
    stock_prices = data
    time_series = pd.Series(stock_prices)
    train_data = time_series[1:len(time_series)-200]
    test_data = time_series[len(time_series)-200:]
    
    p_values = [0,1,2,3,5,7]
    d_values = range(0, 3)
    q_values = range(0, 3)
    min_error = 1000000000
    best = ()
    
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p,d,q)
                warnings.filterwarnings("ignore")
                model = ARIMA(train_data, order=order).fit()
                predictions = model.predict(start=len(train_data), end=len(train_data) + len(test_data)-1)
                error = mean_squared_error(test_data, predictions)

                if(error<min_error) :
                    min_error = error
                    best = order

    return best

In [75]:
def predictor_arima(path):
    df = pd.read_csv(path)

    df.dropna(inplace=True)
    # df = fix_outliers_iqr(df)  // Best reslts were got when we didn't fix the outliers
    boxcox = PowerTransformer(method="box-cox")

    xtrain = boxcox.fit_transform(np.array(df['Close']).reshape(-1,1))

    model = ARIMA(xtrain,order=(7,1,1))

    model_fit = model.fit()

    next_100 = model_fit.forecast(100)

    ids=[]
    for i in range(1,101):
        ID = f"{os.path.splitext(os.path.basename(path))[0]}_#{i}"
        ids.append(ID)

    y_pred = list(boxcox.inverse_transform(np.array(next_100).reshape(-1,1)))
    pred_df=pd.DataFrame({'ID':ids,'Price':[i[0] for i in y_pred]})

    return pred_df

# Prediction Using LSTM

In [76]:

def Keras(x,y,x1,x_input,epochs):

    callback = tf.keras.callbacks.EarlyStopping(
    monitor="loss",
    min_delta=0,
    patience=0,
    verbose=0,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
    start_from_epoch=0,
)
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape= (x.shape[1], 1)))
    model.add(LSTM(64, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss="mean_squared_error")
    model.fit(x,y,epochs=epochs,verbose=1,batch_size=5,callbacks=[callback])
    
    y_pred = model.predict(x1)
    
    for i in range(100):
        pred = model.predict(x_input.T[-10:])
        x_input=np.append(x_input,pred,axis=0)
        y_pred=np.append(y_pred,pred,axis=0)
    
    return y_pred

### Function To split Dataset

In [77]:
def create_dataset(dataset, time_step=1):
	dataX, dataY = [], []
	for i in range(len(dataset.values)-time_step-1):
		a = dataset.values[i:(i+time_step), 0]
		dataX.append(a)
		dataY.append(dataset.values[i + time_step, 0])
	return np.array(dataX), np.array(dataY)

In [78]:
def predictor_LSTM(path):
    df = pd.read_csv(path)

    df.dropna(inplace=True)
    # df = fix_outliers_iqr(df)
    mx = StandardScaler()
    
    Close = mx.fit_transform(np.array(df['Close']).reshape(-1,1))
    split_ratio = 0.8 
    split_index = int(split_ratio * len(Close))

    xtrain = pd.DataFrame(Close[:split_index])
    xtest = pd.DataFrame(Close[split_index:])

    x,y = create_dataset(xtrain,50)
    x1,y1= create_dataset(xtest,50)
    x_input= xtest.values[-100:]

    ypred = Keras(x,y,x1,x_input,epochs=5)
    ypred = mx.inverse_transform(ypred)[-100:]
    ypred = ypred.reshape((1,ypred.shape[0])).ravel()
    
    ids=[]
    for i in range(1,101):
        ID = f"{os.path.splitext(os.path.basename(path))[0]}_#{i}"
        ids.append(ID)

    pred_df=pd.DataFrame({'ID':ids,'Price':ypred})

    return pred_df



# Function To Write Into CSV

In [79]:
def write_to_csv(dfs):  
    # write_to = pd.read_csv('submit.csv')
    updated_df = pd.concat(dfs)
    updated_df.to_csv('submit.csv', index=False)

### A CSV file was initiated beforehand with column names ID and Price
1. File with name submit.csv was made already where we wrote our predictions 

In [2]:
dfs=[]
for csv in csv_files:

    pred = predictor_arima(f'./mine-the-model-2023/Upload-Dataset/TRAIN/{csv}')
    # pred = predictor_prophet(f'./mine-the-model-2023/Upload-Dataset/TRAIN/{csv}')
    # pred = predictor_LSTM(f'./mine-the-model-2023/Upload-Dataset/TRAIN/{csv}')
    data = pd.DataFrame({'ID': pred['ID'], 'Price':[i for i in pred['Price']]})
    dfs.append(data)
    
write_to_csv(dfs)