In [41]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/mosaic24-ps1/sample_submission.csv
/kaggle/input/mosaic24-ps1/train.csv
/kaggle/input/mosaic24-ps1/test.csv


In [42]:
import math
import datetime as dt

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM, GRU

from itertools import cycle
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [43]:
train = pd.read_csv("/kaggle/input/mosaic24-ps1/train.csv")
test = pd.read_csv("/kaggle/input/mosaic24-ps1/test.csv")

In [44]:
# Pivot the DataFrame
df_closing = train.pivot(index='Date', columns='Company', values=['Close'])

# Flatten the MultiIndex columns
df_closing.columns = ['_'.join(col).strip() for col in df_closing.columns.values]

# Reset index for better structure
df_closing.reset_index(inplace=True)

# Display the result
df_closing.head()

Unnamed: 0,Date,Close_Company_0,Close_Company_1,Close_Company_2,Close_Company_3,Close_Company_4,Close_Company_5
0,2033-03-01,161.915062,113.066659,50.094499,98.811966,8873.173069,135.882546
1,2033-03-05,163.108507,110.561246,49.250071,96.594597,8794.758109,135.970813
2,2033-03-06,161.716298,109.651386,48.878616,93.602065,8616.178598,136.070561
3,2033-03-07,148.051907,109.723772,48.670564,90.71808,8679.16597,135.772818
4,2033-03-08,148.483368,109.118052,47.642654,90.771456,8729.613528,137.85436


In [45]:
df_closing_company4 = df_closing[['Date', 'Close_Company_4']]

In [46]:
df_closing_company4.loc[:, 'Date'] = pd.to_datetime(df_closing_company4['Date'], utc=True)

In [47]:
from datetime import datetime
import pytz

# Convert the string to a timezone-aware Timestamp object
date_threshold_str = '2033-06-01'
date_threshold = pd.Timestamp(datetime.strptime(date_threshold_str, '%Y-%m-%d'), tz=pytz.UTC)

# Filter the DataFrame based on the condition
stock_df_close_last_1_year = df_closing_company4[df_closing_company4['Date'] > date_threshold]

# Make a copy of the filtered DataFrame
stock_df_close_last_1_year_original_gt = stock_df_close_last_1_year.copy()

# Print the total data for prediction
print("Total data for prediction: ", stock_df_close_last_1_year.shape[0])

Total data for prediction:  1173


In [48]:
fig = px.line(stock_df_close_last_1_year, x=stock_df_close_last_1_year.Date, y=stock_df_close_last_1_year.Close_Company_4,labels={'Date':'Date','Close_Company_4':'Close_Company_4'})

fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')

fig.update_layout(title_text='Considered period to predict Stock close price', plot_bgcolor='white', font_size=15, font_color='black')

fig.update_xaxes(showgrid=False)

fig.update_yaxes(showgrid=False)

fig.show()

In [49]:
training_size=int(len(stock_df_close_last_1_year)*0.80)

test_size=len(stock_df_close_last_1_year)-training_size

train_data,test_data = stock_df_close_last_1_year[0:training_size], stock_df_close_last_1_year[training_size:len(stock_df_close_last_1_year)]

print("train_data: ", train_data.shape)  # (201, 2)

print("test_data: ", test_data.shape)

train_data:  (938, 2)
test_data:  (235, 2)


In [50]:
del train_data['Date']

del test_data['Date']

In [51]:
scaler=MinMaxScaler(feature_range=(0,1))

train_data=scaler.fit_transform(np.array(train_data).reshape(-1,1))

test_data = scaler.transform(np.array(test_data).reshape(-1,1))

print(train_data.shape)

(938, 1)


In [52]:
def create_sliding_window(dataset, time_step=200):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100 
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [53]:
time_step = 200

X_train, y_train = create_sliding_window(train_data, time_step)

X_test, y_test = create_sliding_window(test_data, time_step)

print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test", y_test.shape)

X_train:  (737, 200)
y_train:  (737,)
X_test:  (34, 200)
y_test (34,)


In [54]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)


print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

X_train:  (737, 200, 1)
X_test:  (34, 200, 1)


In [65]:
model=Sequential()

model.add(GRU(64,return_sequences=True,input_shape=(time_step,1)))

model.add(GRU(32,return_sequences=True))

model.add(GRU(32))

model.add(Dropout(0.20))

model.add(Dense(1))

model.compile(loss='mean_squared_error',optimizer='adam')


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [66]:
history = model.fit(X_train,y_train, validation_data=(X_test,y_test), epochs=400, batch_size=32, verbose=1)

Epoch 1/400
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 34ms/step - loss: 0.1059 - val_loss: 0.0449
Epoch 2/400
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0142 - val_loss: 0.0120
Epoch 3/400
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0070 - val_loss: 0.0029
Epoch 4/400
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0062 - val_loss: 0.0013
Epoch 5/400
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0043 - val_loss: 0.0015
Epoch 6/400
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0041 - val_loss: 2.9015e-04
Epoch 7/400
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0044 - val_loss: 3.3230e-04
Epoch 8/400
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0038 - val_loss: 0.0010
Epoch 9/400
[1m24/24[0m [32m━

In [67]:
train_predict=model.predict(X_train)

test_predict=model.predict(X_test)

train_predict.shape, test_predict.shape

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 


((737, 1), (34, 1))

In [68]:
train_predict = scaler.inverse_transform(train_predict)

test_predict = scaler.inverse_transform(test_predict)

original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 

original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 

In [69]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain, train_predict)))

print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))

print("Train data MAE: ", mean_absolute_error(original_ytrain,train_predict))

print("-------------------------------------------------------------------------------------")

print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))

print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))

print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))

Train data RMSE:  148.8657251558449
Train data MSE:  22161.00412617555
Train data MAE:  106.63315721808416
-------------------------------------------------------------------------------------
Test data RMSE:  84.73873108289156
Test data MSE:  7180.652545538614
Test data MAE:  68.40501966931949


In [72]:
# Reshape test data to use as input for prediction
x_input = test_data[len(test_data) - time_step:]
x_input = (np.array(x_input)).reshape(1,time_step,-1)
print("x_input.shape ", x_input.shape)
# print("x_input", x_input)
print('###########################################################################')

from numpy import array

# Initialize variables
temp_input = list(x_input)
lst_output = []
n_steps = time_step
pred_days = 96  # Predict for 96 days

for _ in range(pred_days):
    if len(temp_input[0]) > time_step:
        x_input = array(temp_input[0][1:])  # Remove the first element from temp_input
        x_input = x_input.reshape((1, n_steps, -1))
        y_pred = model.predict(x_input, verbose=0)
        
        temp_input[0] = np.append(temp_input[0][1:], y_pred)  # Append the predicted value to temp_input
        lst_output.append(y_pred[0].tolist())
    else:
        # Reshape input for prediction if temp_input length is less than time_step
        x_input = array(temp_input[0])
        x_input = x_input.reshape((1, n_steps, -1))
        y_pred = model.predict(x_input, verbose=0)
        
        temp_input[0] = np.append(temp_input[0], y_pred)  # Append the predicted value to temp_input
        lst_output.append(y_pred[0].tolist())

print("Output of predicted next days: ", len(lst_output))

x_input.shape  (1, 200, 1)
###########################################################################
Output of predicted next days:  96


In [73]:
last_days=np.arange(1,time_step+1)
day_pred=np.arange(time_step+1,time_step+pred_days+1)
temp_matrix = np.empty((len(last_days)+pred_days, 1))

temp_matrix[:] = np.nan

In [74]:
last_original_days_value = temp_matrix

next_predicted_days_value = temp_matrix

last_original_days_value[0:time_step] = (np.array((stock_df_close_last_1_year_original_gt.iloc[-time_step:]['Close_Company_4']))).reshape(-1,1).tolist()

# Assign values to next_predicted_days_value
next_predicted_days_value[time_step:] = (scaler.inverse_transform(lst_output)).tolist()[0]

In [75]:

# Convert next_predicted_days_value to a DataFrame
predictions_df = pd.DataFrame(next_predicted_days_value, columns=['Predictions4'])

# Save the DataFrame to a CSV file
predictions_df.to_csv('predictions4.csv', index=False)

In [76]:
# scaler is your MinMaxScaler object
# Inverse transform lst_output
new_predicted_values = scaler.inverse_transform(np.array(lst_output).reshape(-1, 1))

# Convert predicted_values to a 1D array
new_predicted_values = new_predicted_values.flatten()


# Convert next_predicted_days_value to a DataFrame
new_predictions_df = pd.DataFrame(new_predicted_values, columns=['Predictions4'])

# Save the DataFrame to a CSV file
new_predictions_df.to_csv('newpredictions4.csv', index=False)