In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

In [2]:
# Access Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Load the datasets
usd_data = pd.read_csv('/content/drive/MyDrive/dataset/us_dollar.csv', sep=',')
bitcoin_data = pd.read_csv('/content/drive/MyDrive/dataset/bitcoin.csv', sep=',')
nasdaq_data = pd.read_csv('/content/drive/MyDrive/dataset/nasdaq.csv', sep=',')

In [4]:
# Remove commas from 'Price' column and convert to float
nasdaq_data['Price'] = nasdaq_data['Price'].str.replace(',', '').astype(float)

In [5]:
# Define preprocess_data function
def preprocess_data(data, seq_length):
    # Scaling data
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data[['Price']])

    X = []
    y = []
    for i in range(len(data_scaled) - seq_length):
        X.append(data_scaled[i:i+seq_length])
        y.append(data_scaled[i+seq_length])

    X = np.array(X)
    y = np.array(y)

    return X, y, scaler

In [6]:
# Split Data Function
def split_data(X, y, train_ratio=0.7, val_ratio=0.2):
    total_samples = len(X)
    train_end = int(total_samples * train_ratio)
    val_end = int(total_samples * (train_ratio + val_ratio))

    X_train, y_train = X[:train_end], y[:train_end]
    X_val, y_val = X[train_end:val_end], y[train_end:val_end]
    X_test, y_test = X[val_end:], y[val_end:]

    return X_train, y_train, X_val, y_val, X_test, y_test

In [7]:
# Prepare the data
seq_length = 30

In [8]:
# USD Data
X_usd, y_usd, usd_scaler = preprocess_data(usd_data, seq_length)
X_train_usd, y_train_usd, X_val_usd, y_val_usd, X_test_usd, y_test_usd = split_data(X_usd, y_usd)

# Bitcoin Data
X_bitcoin, y_bitcoin, bitcoin_scaler = preprocess_data(bitcoin_data, seq_length)
X_train_bitcoin, y_train_bitcoin, X_val_bitcoin, y_val_bitcoin, X_test_bitcoin, y_test_bitcoin = split_data(X_bitcoin, y_bitcoin)

# Nasdaq Data
X_nasdaq, y_nasdaq, nasdaq_scaler = preprocess_data(nasdaq_data, seq_length)
X_train_nasdaq, y_train_nasdaq, X_val_nasdaq, y_val_nasdaq, X_test_nasdaq, y_test_nasdaq = split_data(X_nasdaq, y_nasdaq)

In [9]:
# Convert to PyTorch Tensors
def to_tensors(X, y):
    X_tensor = torch.from_numpy(X).float()
    y_tensor = torch.from_numpy(y).float()
    return X_tensor, y_tensor

X_train_usd, y_train_usd = to_tensors(X_train_usd, y_train_usd)
X_val_usd, y_val_usd = to_tensors(X_val_usd, y_val_usd)
X_test_usd, y_test_usd = to_tensors(X_test_usd, y_test_usd)

X_train_bitcoin, y_train_bitcoin = to_tensors(X_train_bitcoin, y_train_bitcoin)
X_val_bitcoin, y_val_bitcoin = to_tensors(X_val_bitcoin, y_val_bitcoin)
X_test_bitcoin, y_test_bitcoin = to_tensors(X_test_bitcoin, y_test_bitcoin)

X_train_nasdaq, y_train_nasdaq = to_tensors(X_train_nasdaq, y_train_nasdaq)
X_val_nasdaq, y_val_nasdaq = to_tensors(X_val_nasdaq, y_val_nasdaq)
X_test_nasdaq, y_test_nasdaq = to_tensors(X_test_nasdaq, y_test_nasdaq)

In [10]:
# Create DataLoader
def create_dataloader(X_train, y_train, X_val, y_val, X_test, y_test, batch_size=32):
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    test_dataset = TensorDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader

train_loader_usd, val_loader_usd, test_loader_usd = create_dataloader(X_train_usd, y_train_usd, X_val_usd, y_val_usd, X_test_usd, y_test_usd)
train_loader_bitcoin, val_loader_bitcoin, test_loader_bitcoin = create_dataloader(X_train_bitcoin, y_train_bitcoin, X_val_bitcoin, y_val_bitcoin, X_test_bitcoin, y_test_bitcoin)
train_loader_nasdaq, val_loader_nasdaq, test_loader_nasdaq = create_dataloader(X_train_nasdaq, y_train_nasdaq, X_val_nasdaq, y_val_nasdaq, X_test_nasdaq, y_test_nasdaq)

In [11]:
# LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # Use only the last time step's output
        return out

In [12]:
# Update the training function for LSTM
def train_lstm_model(model, train_loader, val_loader, num_epochs=10, lr=0.001):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(num_epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                y_pred = model(X_batch)
                val_loss += criterion(y_pred, y_batch).item()

        print(f'Epoch {epoch+1}, Train Loss: {loss.item()}, Val Loss: {val_loss/len(val_loader)}')

In [13]:
# Train the LSTM model on USD data
input_size = X_train_usd.shape[2]
output_size = 1
hidden_size = 64
num_layers = 2

print("Training LSTM model on USD data")
lstm_model_usd = LSTMModel(input_size, hidden_size, num_layers, output_size)
train_lstm_model(lstm_model_usd, train_loader_usd, val_loader_usd)

Training LSTM model on USD data
Epoch 1, Train Loss: 0.019207824021577835, Val Loss: 0.047924858631126374
Epoch 2, Train Loss: 0.001091514015570283, Val Loss: 0.009874849012703635
Epoch 3, Train Loss: 0.0006551790283992887, Val Loss: 0.007317534482960279
Epoch 4, Train Loss: 0.00047764883493073285, Val Loss: 0.006242897280672979
Epoch 5, Train Loss: 0.0009234330500476062, Val Loss: 0.006103296938817948
Epoch 6, Train Loss: 0.0011697725858539343, Val Loss: 0.00546141392907076
Epoch 7, Train Loss: 0.0005005383864045143, Val Loss: 0.004697149815102521
Epoch 8, Train Loss: 0.0008984467713162303, Val Loss: 0.0043678883392001605
Epoch 9, Train Loss: 0.001421266933903098, Val Loss: 0.0038574353772370764
Epoch 10, Train Loss: 0.0007410934194922447, Val Loss: 0.00348355468789426


In [17]:
# Predict future values using the trained LSTM model
def predict_future_lstm(model, X_val, scaler, predict_days=90):
    model.eval()
    all_predictions = []
    with torch.no_grad():
        for input_seq in X_val:
            input_seq = input_seq.unsqueeze(0)  # Add batch dimension
            predictions = []
            for _ in range(predict_days):
                next_pred = model(input_seq)
                predictions.append(next_pred.item())
                input_seq = torch.cat((input_seq[:, :, 1:], next_pred.unsqueeze(2)), dim=2)  # Slide window
            all_predictions.extend(predictions)

    all_predictions = scaler.inverse_transform(np.array(all_predictions).reshape(-1, 1))
    return all_predictions

predictions_usd_lstm = predict_future_lstm(lstm_model_usd, X_val_usd, usd_scaler)
plot_results(usd_data, predictions_usd_lstm, 'Actual vs. Predicted USD Index (LSTM)')

RuntimeError: Sizes of tensors must match except in dimension 2. Expected size 30 but got size 1 for tensor number 1 in the list.