In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import matplotlib.pyplot as plt
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
data = pd.read_csv('/kaggle/input/amazon-stock-price-all-time/Amazon.csv')

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [None]:
#plt.plot(data['Date'], data['Close'])
#plt.show()

In [None]:
data.columns

In [None]:
data = data.drop(['Date', 'Adj Close'], axis=1)

In [None]:
data

In [None]:
window = 3
features = 5
def prepare_data(data):
    prepared = []
    for row in range(len(data)//window):
        new_row = [0] * (window) * features + [0]
        for f in range(features):
            for t in range(window):
                new_row[window*f + t] = data.iloc[row + t, f]
        new_row[-1] = data['Close'].iloc[row + window]
        prepared.append(new_row)
    prepared = pd.DataFrame(prepared)
    prepared.columns = [f'Open_t-{window-t}' for t in range(window)] + [f'High_t-{window-t}' for t in range(window)] + [f'Low_t-{window-t}' for t in range(window)] + [f'Close_t-{window-t}' for t in range(window)] + [f'Volume_t-{window-t}' for t in range(window)] + ["Close"]
    columns = []
    for t in range(window):
        columns += [f'Open_t-{window-t}', f'High_t-{window-t}', f'Low_t-{window-t}', f'Close_t-{window-t}', f'Volume_t-{window-t}']
    return prepared, columns
data_train, columns = prepare_data(data)
#data_train = data_train[[f'Close_t-{window-t}' for t in range(window)] + ['Close']]
target = data_train['Close']
print(data_train)

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1, 1))
data_train = scaler.fit_transform(data_train.drop('Close', axis=1))

In [None]:
data_train = pd.DataFrame(data_train, columns=columns)

In [None]:
data_train['Close'] = target

In [None]:
data_train.columns

In [None]:
columns_to_pass = []
for t in range(window):
    columns_to_pass += [f'Close_t-{window-t}']
    #columns_to_pass += [f'Volume_t-{window-t}']
columns_to_pass += ['Close']



In [None]:
data_train = data_train[columns_to_pass]

In [None]:
split = 8 * len(data_train)//10
X_train = data_train[:split].drop('Close', axis=1)
y_train = data_train[:split]['Close']

X_test = data_train[split:].drop('Close', axis=1)
y_test = data_train[split:]['Close']


In [None]:
X_train = torch.tensor(np.array(X_train)).float().unsqueeze(-1)
y_train = torch.tensor(np.array(y_train)).float().unsqueeze(-1)

X_test = torch.tensor(np.array(X_test)).float().unsqueeze(-1)
y_test = torch.tensor(np.array(y_test)).float().unsqueeze(-1)

In [None]:
X_train.shape, y_train.shape

In [None]:
X_train.size()

In [None]:
from torch.utils.data import Dataset

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]
    
train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

In [None]:
import torch.nn as nn

In [None]:
from torch.utils.data import DataLoader

In [None]:
batchsize = 16
train_loader = DataLoader(dataset=train_dataset, batch_size=batchsize,shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batchsize,shuffle=False)

In [None]:
for _, batch in enumerate(train_loader):
    x_batch, y_batch = batch[0].to(device), batch[1].to(device)
    print(x_batch.shape, y_batch.shape)
    break

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers

        self.lstm = nn.RNN(input_size, hidden_size, num_stacked_layers, 
                            batch_first=True)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        
        out, _ = self.lstm(x, h0)
        print(out)
        print(_)
        out = self.relu(out)
        out = self.fc(out[:, -1, :])
        print(out)
        return out

model = LSTM(1, 5, 1)
model.to(device)
model

In [None]:
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

In [None]:
def train_one_epoch():
    model.train(True)
    print(f'Epoch: {epoch + 1}')
    running_loss = 0.0
    
    for batch_index, batch in enumerate(train_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)
        output = model(x_batch)
        loss = loss_function(output, y_batch)
        running_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_index % 50 == 0:  # print every 100 batches
            avg_loss_across_batches = running_loss / 50
            print('Batch {0}, Loss: {1:.3f}'.format(batch_index+1,
                                                    avg_loss_across_batches))
            running_loss = 0.0
    print()

In [None]:
def validate_one_epoch():
    model.train(False)
    running_loss = 0.0
    
    for batch_index, batch in enumerate(test_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)
        
        with torch.inference_mode():
            output = model(x_batch)
            loss = loss_function(output, y_batch)
            running_loss += loss.item()

    avg_loss_across_batches = running_loss / len(test_loader)
    
    print('Val Loss: {0:.3f}'.format(avg_loss_across_batches))
    print('***************************************************')
    print()

In [None]:
NUM_EPOCHS = 100
for epoch in range(NUM_EPOCHS):
    train_one_epoch()
    validate_one_epoch()

In [None]:
with torch.no_grad():
    predicted = model(X_train.to(device)).to('cpu').numpy()

plt.plot(y_train, label='Actual Close')
plt.plot(predicted, label='Predicted Close')
plt.xlabel('Day')
plt.ylabel('Close')
plt.legend()
plt.show()


In [None]:
with torch.no_grad():
    predicted = model(X_test.to(device)).to('cpu').numpy()

plt.plot(y_test, label='Actual Close')
plt.plot(predicted, label='Predicted Close')
plt.xlabel('Day')
plt.ylabel('Close')
plt.legend()
plt.show()