# Forcasting  

In this notebook we will predict the closing prices for the next month

In [38]:
# Import Libraries
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler
import datetime


In [39]:

# Load the dataset
data = pd.read_csv('./data/cleaned_weekly_stock_market.csv')


In [40]:

# Extract rows for specified companies
companies = ['AMEN BANK', 'ARTES', 'ASSAD', 'BIAT', 'BANQUE DE TUNISIE', 'EURO-CYCLES',
             'SOTUMAG', 'ONE TECH', 'TUNINDEX', 'SAH', 'SFBT', 'SOMOCER', 'SOTETEL',
             'SOTUVER', 'TUNISAIR', 'BANQUE ATTIJARI DE TUNIS', 'TELNET HOLDING', 'TPR', 'UIB']

data_filtered = data[data['companyName'].isin(companies)]


In [41]:
def str_to_datetime(s):
    try:
        return datetime.datetime.strptime(s, '%Y-%m-%d')
    except ValueError:
        return datetime.datetime.strptime(s, '%d/%m/%Y')

In [42]:

def df_to_windowed_df(dataframe, first_date_str, last_date_str, n=3):
    first_date = str_to_datetime(first_date_str)
    last_date = str_to_datetime(last_date_str)
    target_date = first_date

    dates = []
    X, Y = [], []
    last_time = False

    while True:
        df_subset = dataframe.loc[:target_date].tail(n+1)
        if len(df_subset) != n+1:
            print(f'Error: Window of size {n} is too large for date {target_date}')
            return
        values = df_subset['closingPrice'].to_numpy()
        x, y = values[:-1], values[-1]

        dates.append(target_date)
        X.append(x)
        Y.append(y)

        next_week = dataframe.loc[target_date:target_date+datetime.timedelta(days=7)]
        next_datetime_str = str(next_week.head(2).tail(1).index.values[0])
        next_date_str = next_datetime_str.split('T')[0]
        year_month_day = next_date_str.split('-')
        year, month, day = year_month_day
        next_date = datetime.datetime(day=int(day), month=int(month), year=int(year))

        if last_time:
            break

        target_date = next_date

        if target_date == last_date:
            last_time = True

    ret_df = pd.DataFrame({})
    ret_df['Target Date'] = dates

    X = np.array(X)
    for i in range(0, n):
        X[:, i]
        ret_df[f'Target-{n-i}'] = X[:, i]

    ret_df['Target'] = Y

    return ret_df


In [43]:

def windowed_df_to_date_X_y(windowed_df):
    df_as_np = windowed_df.to_numpy()
    dates = df_as_np[:, 0]
    middle_matrix = df_as_np[:, 1:-1]
    X = middle_matrix.reshape(len(dates), middle_matrix.shape[1], 1)
    y = df_as_np[:, -1]
    return dates, X.astype(np.float32), y.astype(np.float32)


In [44]:

def train_and_plot_model(data, company_name):
    df = data[['date', 'closingPrice']]
    df['date'] = df['date'].apply(str_to_datetime)
    df.index = df.pop('date')
    windowed_df = df_to_windowed_df(df, '30/06/2014', '10/06/2024', n=3)
    
    if windowed_df is None:
        return

    dates, X, y = windowed_df_to_date_X_y(windowed_df)

    X_train_val, X_test, y_train_val, y_test, dates_train_val, dates_test = train_test_split(
        X, y, dates, test_size=0.2, random_state=42
    )

    X_train, X_val, y_train, y_val, dates_train, dates_val = train_test_split(
        X_train_val, y_train_val, dates_train_val, test_size=0.25, random_state=42
    )

    # Plot the data
    plt.figure(figsize=(12, 6))
    plt.plot(dates_train, y_train, label='Train')
    plt.plot(dates_val, y_val, label='Validation')
    plt.plot(dates_test, y_test, label='Test')
    plt.legend()
    plt.xlabel('Date')
    plt.ylabel('Value')
    plt.title(f'Train, Validation, and Test Data for {company_name}')
    plt.show()

    model = Sequential([layers.Input(shape=(3,1)),
                        layers.LSTM(64),
                        layers.Dense(32, activation='relu'),
                        layers.Dense(32, activation='relu'),
                        layers.Dense(1)])
    model.compile(loss='mse',
                  optimizer=Adam(learning_rate=0.001),
                  metrics=['mean_absolute_error'])
    model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val))

    train_pred = model.predict(X_train).flatten()
    val_pred = model.predict(X_val).flatten()
    test_pred = model.predict(X_test).flatten()

    plt.figure(figsize=(12, 6))
    plt.plot(dates_train, train_pred)
    plt.plot(dates_train, y_train)
    plt.plot(dates_val, val_pred)
    plt.plot(dates_val, y_val)
    plt.plot(dates_test, test_pred)
    plt.plot(dates_test, y_test)
    plt.legend(['Training Prediction', 'Training Observation',
                'Validation Prediction', 'Validation Observation',
                'Test Prediction', 'Test Observation'])
    plt.title(f'Predictions and Observations for {company_name}')
    plt.show()


In [45]:

# Apply the model for each company
for company in companies:
    company_data = data_filtered[data_filtered['companyName'] == company]
    if not company_data.empty:
        print(f'Training and plotting for {company}')
        train_and_plot_model(company_data, company)
    else:
        print(f'No data available for {company}')


Training and plotting for AMEN BANK
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for ARTES
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for ASSAD
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for BIAT
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for BANQUE DE TUNISIE
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for EURO-CYCLES


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = df['date'].apply(str_to_datetime)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = df['date'].apply(str_to_datetime)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = df['date'].apply(str_to_datetime)
A value is trying to be set on a copy of a slice from a DataFram

Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for SOTUMAG
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for ONE TECH
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for TUNINDEX
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for SAH
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for SFBT
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for SOMOCER
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for SOTETEL
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for SOTUVER
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for TUNISAIR
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for BANQUE ATTIJ

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = df['date'].apply(str_to_datetime)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = df['date'].apply(str_to_datetime)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = df['date'].apply(str_to_datetime)
A value is trying to be set on a copy of a slice from a DataFram

Training and plotting for TELNET HOLDING
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for TPR
Error: Window of size 3 is too large for date 2014-06-30 00:00:00
Training and plotting for UIB
Error: Window of size 3 is too large for date 2014-06-30 00:00:00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = df['date'].apply(str_to_datetime)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = df['date'].apply(str_to_datetime)
