In [9]:
import pandas as pd
import os
import glob
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import plotly.graph_objs as go

In [10]:
# Set the relative path to the data directory
data_dir = os.path.join('..', 'Stock')

# Function to get all CSV files
def get_csv_files(directory):
    return glob.glob(os.path.join(directory, '*.csv'))

# List all CSV files
csv_files = get_csv_files(data_dir)
print("CSV files:", csv_files)

# load a specific CSV file by name
def load_csv_file(filename, directory):
    file_path = os.path.join(directory, filename)
    return pd.read_csv(file_path)

CSV files: ['..\\Stock\\AAPL.csv', '..\\Stock\\AMZN.csv', '..\\Stock\\BA.csv', '..\\Stock\\GOOG.csv', '..\\Stock\\IBM.csv', '..\\Stock\\MGM.csv', '..\\Stock\\SP500.csv', '..\\Stock\\T.csv', '..\\Stock\\TSLA.csv']


In [25]:
# Load a specific CSV file
csv_file_to_load = 'AAPL.csv'
df = pd.read_csv(csv_files[0])
df

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,01-02-2019,38.72,39.71,38.56,39.48,148158952
1,01-03-2019,36.00,36.43,35.50,35.55,365248812
2,01-04-2019,36.13,37.14,35.95,37.07,234428280
3,01-07-2019,37.18,37.21,36.48,36.98,219111048
4,01-08-2019,37.39,37.96,37.13,37.69,164101248
...,...,...,...,...,...,...
1349,05/14/2024,187.51,188.30,186.29,187.43,52393621
1350,05/15/2024,187.91,190.65,187.37,189.72,70399992
1351,05/16/2024,190.47,191.10,189.66,189.84,52845230
1352,05/17/2024,189.51,190.81,189.18,189.87,41282930


In [26]:
def reformat_date(date):
    return date.replace('-', '/')

# Apply the function to the first column (dates)
df['Date'] = df['Date'].apply(reformat_date)
df

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,01/02/2019,38.72,39.71,38.56,39.48,148158952
1,01/03/2019,36.00,36.43,35.50,35.55,365248812
2,01/04/2019,36.13,37.14,35.95,37.07,234428280
3,01/07/2019,37.18,37.21,36.48,36.98,219111048
4,01/08/2019,37.39,37.96,37.13,37.69,164101248
...,...,...,...,...,...,...
1349,05/14/2024,187.51,188.30,186.29,187.43,52393621
1350,05/15/2024,187.91,190.65,187.37,189.72,70399992
1351,05/16/2024,190.47,191.10,189.66,189.84,52845230
1352,05/17/2024,189.51,190.81,189.18,189.87,41282930


In [27]:
df['Date'] = pd.to_datetime(df['Date'])
df

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2019-01-02,38.72,39.71,38.56,39.48,148158952
1,2019-01-03,36.00,36.43,35.50,35.55,365248812
2,2019-01-04,36.13,37.14,35.95,37.07,234428280
3,2019-01-07,37.18,37.21,36.48,36.98,219111048
4,2019-01-08,37.39,37.96,37.13,37.69,164101248
...,...,...,...,...,...,...
1349,2024-05-14,187.51,188.30,186.29,187.43,52393621
1350,2024-05-15,187.91,190.65,187.37,189.72,70399992
1351,2024-05-16,190.47,191.10,189.66,189.84,52845230
1352,2024-05-17,189.51,190.81,189.18,189.87,41282930


In [23]:
def change_format(date):
    # Split the date by '-' and rearrange it as needed
    parts = date.split('-')
    return f'{parts[2]}-{parts[1]}-{parts[0]}'

In [11]:
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size=1, hidden_size=50, num_layers=1, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=50, hidden_size=50, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(50, 25)
        self.fc2 = nn.Linear(25, 1)
        
    def forward(self, x):
        out, _ = self.lstm1(x)
        out, _ = self.lstm2(out)
        out = out[:, -1, :]  # Get the last output of the sequence
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [13]:
csv_names = ["AAPL", "AMZN", "BA", "GOOG", "IBM", "MGM", "SP500", "T", "TSLA"]
print(csv_names)

['AAPL', 'AMZN', 'BA', 'GOOG', 'IBM', 'MGM', 'SP500', 'T', 'TSLA']


In [37]:
for file in range(6, 7):
    print(f'Training the model for stock {csv_names[file]}')
    print('----------------------------------------------------')
    
    df = pd.read_csv(csv_files[file])
    df['Date'] = df['Date'].apply(reformat_date)
    
    # Parse the Date column to datetime
    df['Date'] = pd.to_datetime(df['Date'])

    # Set the Date column as the index
    df.set_index('Date', inplace=True)

    # Sort the data by date
    df.sort_index(inplace=True)

       # Convert 'Close' column to numpy.float64 explicitly
    df['Close'] = df['Close'].str.replace(',', '').astype(np.float64)

    # Selecting the feature and target columns
    data = df[['Close']].values
    # Selecting the feature and target columns
    # data = df[['Close']].values

    # Normalize the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)

    train_size = int(len(scaled_data) * 0.8)
    train_data = scaled_data[:train_size]
    test_data = scaled_data[train_size:]

    def create_dataset(dataset, time_step=1):
        X, Y = [], []
        for i in range(len(dataset) - time_step - 1):
            a = dataset[i:(i + time_step), 0]
            X.append(a)
            Y.append(dataset[i + time_step, 0])
        return np.array(X), np.array(Y)

    time_step = 60
    X_train, y_train = create_dataset(train_data, time_step)
    X_test, y_test = create_dataset(test_data, time_step)

    # Convert to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32).unsqueeze(2)
    y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
    X_test = torch.tensor(X_test, dtype=torch.float32).unsqueeze(2)
    y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

    model = LSTMModel()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    num_epochs = 500
    batch_size = 64

    train_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True
    )

    for epoch in range(num_epochs):
        for inputs, targets in train_loader:
            model.train()
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

    # Save the model
    torch.save(model.state_dict(), f"model_{csv_names[file]}.pth")

Training the model for stock SP500
----------------------------------------------------
Epoch 1/500, Loss: 0.08040400594472885
Epoch 2/500, Loss: 0.048219047486782074
Epoch 3/500, Loss: 0.022953998297452927
Epoch 4/500, Loss: 0.02666280046105385
Epoch 5/500, Loss: 0.019692422822117805
Epoch 6/500, Loss: 0.0048343404196202755
Epoch 7/500, Loss: 0.0025136517360806465
Epoch 8/500, Loss: 0.0032301549799740314
Epoch 9/500, Loss: 0.002867091214284301
Epoch 10/500, Loss: 0.00241358601488173
Epoch 11/500, Loss: 0.00242918380536139
Epoch 12/500, Loss: 0.0018574846908450127
Epoch 13/500, Loss: 0.0015120944008231163
Epoch 14/500, Loss: 0.0016609937883913517
Epoch 15/500, Loss: 0.0014514281647279859
Epoch 16/500, Loss: 0.001986461691558361
Epoch 17/500, Loss: 0.0013521094806492329
Epoch 18/500, Loss: 0.0012738608056679368
Epoch 19/500, Loss: 0.0015799712855368853
Epoch 20/500, Loss: 0.00151357043068856
Epoch 21/500, Loss: 0.0006979286554269493
Epoch 22/500, Loss: 0.000883164000697434
Epoch 23/500,

In [32]:
df = pd.read_csv(csv_files[0])
print(type(df['Close'][0]))


<class 'numpy.float64'>
