In [None]:
import os
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
from src import CompanyDetails
import datetime as dt

LSTM uses 3D array --> (batch_size, time_stamp, seq_len)

LSTM output may 2D or 3D [if o/p 2D --> o/p(batch_size, units)  |  if o/p 3D --> o/p(batch_size, time_stamp, units)]

In [None]:
c_name = 'ITC'
details = CompanyDetails(c_name)

price_arr = details.sharePriceRange('4y', '1d')
for val in price_arr:
    print(val)

In [None]:
df = pd.DataFrame(price_arr)
# df.reset_index()
# print(df.shape)
# print(df.info)
# print(df.describe())
print(df.head())

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Close'], linewidth = 2)

In [None]:
ma100 = df.Close.rolling(100).mean()
ma200 = df.Close.rolling(200).mean()
df['ma100'] = ma100
df['ma200'] = ma200
print(df)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df.Close, label = 'share price', linewidth=2)
plt.plot(df.ma100, label='100 moving average', linewidth=1)
plt.plot(df.ma200, label='200 moving average', linewidth=1)
plt.legend()
plt.show()

In [None]:
ema100 = df.Close.ewm(span=100, adjust=False).mean()
ema200 = df.Close.ewm(span=200, adjust=False).mean()
df['ema100'] = ema100
df['ema200'] = ema200
print(df.tail())

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df.Close, label='share price', linewidth=2)
plt.plot(df.ema100, label='ema100', linewidth=1)
plt.plot(df.ema200, label='ema200', linewidth=1)
plt.legend()

In [None]:
import datetime

def str_to_datetime(date_str):
    split = date_str.split('-')
    year, month, day = int(split[0]), int(split[1]), int(split[2])
    return datetime.datetime(year=year, month=month, day=day).date()

print(str_to_datetime('2024-02-16'))


In [None]:
from datetime import date, timedelta

def df_to_windowed_df(dataframe, first_date_str, last_date_str, n=3):
  first_date = str_to_datetime(first_date_str)
  last_date  = str_to_datetime(last_date_str)

  target_date = first_date

  dates = []
  X, Y = [], []

  last_time = False
  while True:
    df_subset = dataframe.loc[:target_date].tail(n + 1)

    if len(df_subset) != n+1:
      print(f'Error: Window of size {n} is too large for date {target_date}')
      return

    values = df_subset['Close'].to_numpy()
    x, y = values[:-1], values[-1]

    dates.append(target_date)
    X.append(x)
    Y.append(y)

    next_week = dataframe.loc[target_date:target_date+datetime.timedelta(days=7)]
    next_datetime_str = str(next_week.head(2).tail(1).index.values[0])
    next_date_str = next_datetime_str.split('T')[0]
    year_month_day = next_date_str.split('-')
    year, month, day = year_month_day
    next_date = datetime.datetime(day=int(day), month=int(month), year=int(year)).date()

    if last_time:
      break

    target_date = next_date

    if target_date == last_date:
      last_time = True

  ret_df = pd.DataFrame({})
  ret_df['Target Date'] = dates

  X = np.array(X)
  for i in range(0, n):
    ret_df[f'Target-{n-i}'] = X[:, i]

  ret_df['Target'] = Y

  return ret_df

# Start day second time around: '2021-03-25'
windowed_df = df_to_windowed_df(df,
                                '2021-03-25',
                                '2022-03-23',
                                n=3)
windowed_df

start_date = '2022-03-20'
end_date = '2025-03-20'

windowed_df = df_to_windowed_df(df, start_date, end_date, 3)
print(windowed_df)


In [None]:
today = date.today()
last_year = today - timedelta(days=2*365)
print(today, last_year)

In [None]:
def windowed_df_to_date_X_y(windowed_dataframe):
  df_as_np = windowed_dataframe.to_numpy()

  dates = df_as_np[:, 0]

  middle_matrix = df_as_np[:, 1:-1]
  X = middle_matrix.reshape((len(dates), middle_matrix.shape[1], 1))

  Y = df_as_np[:, -1]

  return dates, X.astype(np.float32), Y.astype(np.float32)

dates, X, y = windowed_df_to_date_X_y(windowed_df)

print(dates.shape, X.shape, y.shape)
print(dates)

In [None]:
q_80 = int(len(dates) * .8)
q_90 = int(len(dates) * .9)

dates_train, X_train, y_train = dates[:q_80], X[:q_80], y[:q_80]

dates_val, X_val, y_val = dates[q_80:q_90], X[q_80:q_90], y[q_80:q_90]
dates_test, X_test, y_test = dates[q_90:], X[q_90:], y[q_90:]

plt.plot(dates_train, y_train)
plt.plot(dates_val, y_val)
plt.plot(dates_test, y_test)

plt.legend(['Train', 'Validation', 'Test'])

In [None]:
model = Sequential([layers.Input((3, 1)),
                    layers.LSTM(64, input_shape=(X_train.shape[1],1)),
                    layers.Dense(32, activation='relu'),
                    layers.Dense(32, activation='relu'),
                    layers.Dense(1)])

model.compile(loss='mse',
              optimizer=Adam(learning_rate=0.001),
              metrics=['mean_absolute_error'])

model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100)
model.summary()

In [None]:
train_predictions = model.predict(X_train).flatten()

plt.plot(dates_train, train_predictions, linewidth=1)
plt.plot(dates_train, y_train, linewidth=1)
plt.legend(['Training Predictions', 'Training Observations'])

In [None]:
val_predictions = model.predict(X_val).flatten()

plt.plot(dates_val, val_predictions)
plt.plot(dates_val, y_val)
plt.legend(['Validation Predictions', 'Validation Observations'])

In [None]:
test_predictions = model.predict(X_test).flatten()

plt.plot(dates_test, test_predictions)
plt.plot(dates_test, y_test)
plt.legend(['Testing Predictions', 'Testing Observations'])

In [None]:
plt.plot(dates_train, train_predictions)
plt.plot(dates_train, y_train)
plt.plot(dates_val, val_predictions)
plt.plot(dates_val, y_val)
plt.plot(dates_test, test_predictions)
plt.plot(dates_test, y_test)
plt.legend(['Training Predictions',
            'Training Observations',
            'Validation Predictions',
            'Validation Observations',
            'Testing Predictions',
            'Testing Observations'])