In [5]:
## Implementing a RNN to predict time series ##

In [66]:
import numpy as np
from pandas import read_csv
from sklearn.preprocessing import MinMaxScaler

# Read data from csv using pandas
train_data = read_csv("train.txt", header=None, usecols=[1], skiprows=1, dtype=np.float32).values.flatten()
test_data = read_csv("test.txt", header=None, usecols=[1], skiprows=1, dtype=np.float32).values.flatten()


# Normalize the data to [0, 1]
scaler = MinMaxScaler()
train_data = train_data.reshape(-1, 1) 
train_scaled = scaler.fit_transform(train_data).flatten()

test_data = test_data.reshape(-1, 1) 
test_scaled = scaler.transform(test_data).flatten()

# Create data sets
def create_dataset(data, time_window):
    X, y = [], []
    for i in range(len(data) - time_window):
        X.append(data[i:i + time_window])
        y.append(data[i + time_window])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

time_window = 12

trainX, trainY = create_dataset(train_scaled, time_window)
testX, testY = create_dataset(test_scaled, time_window)

# Print shape of data
for i in range(3):
    print(f"trainx[{i}] = {trainX[i]}")
    print(f"trainy[{i}] = {trainY[i]}")

#Unsqueeze for MSE
trainX = torch.tensor(trainX).unsqueeze(-1)  
trainY = torch.tensor(trainY).unsqueeze(-1)  

testX = torch.tensor(testX).unsqueeze(-1)
testY = torch.tensor(testY).unsqueeze(-1)


trainx[0] = [0.02203858 0.03856748 0.077135   0.06887051 0.04683197 0.08539945
 0.12121212 0.12121212 0.08815429 0.04132232 0.         0.03856748]
trainy[0] = 0.03030303120613098
trainx[1] = [0.03856748 0.077135   0.06887051 0.04683197 0.08539945 0.12121212
 0.12121212 0.08815429 0.04132232 0.         0.03856748 0.03030303]
trainy[1] = 0.06060606241226196
trainx[2] = [0.077135   0.06887051 0.04683197 0.08539945 0.12121212 0.12121212
 0.08815429 0.04132232 0.         0.03856748 0.03030303 0.06060606]
trainy[2] = 0.10192838311195374


In [64]:
import torch
import torch.nn as nn
import torch.optim as optim

# Create the RNN structure
class RNN(nn.Module):
     def __init__(self, input_size, hidden_size, output_size):
       super(RNN, self).__init__()
       self.hidden_size = hidden_size
       self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
       self.fc = nn.Linear(hidden_size, output_size)

     def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])
        return out
        
         

# Instantiate the model optimizer and criterion
input_size = 1
hidden_size = 4
output_size = 1
model = RNN(input_size, hidden_size, output_size)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)

# Train the RNN Model for 1000 epoch and print out the training loss for every 100 epochs
epochs = 1000
batch_size = 10

for epoch in range(epochs):
    model.train()
    permutation = torch.randperm(trainX.size(0))
    epoch_loss = 0

    for i in range(0, trainX.size(0), batch_size):
        indices = permutation[i:i + batch_size]
        batch_X, batch_Y = trainX[indices], trainY[indices]

        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_Y)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    if (epoch + 1) % 100 == 0:
        avg_loss = epoch_loss / (trainX.size(0) // batch_size)
        print(f"Epoch [{epoch + 1}/{epochs}], Average Loss: {avg_loss:.4f}")

#Print out the TEST data and label
#for i in range(3):
    #print(f"testX[{i}] = {testX[i].squeeze().tolist()}")
    #print(f"testY[{i}] = {testY[i].item()}")

Epoch [100/1000], Average Loss: 0.0061
Epoch [200/1000], Average Loss: 0.0048
Epoch [300/1000], Average Loss: 0.0048
Epoch [400/1000], Average Loss: 0.0054
Epoch [500/1000], Average Loss: 0.0022
Epoch [600/1000], Average Loss: 0.0014
Epoch [700/1000], Average Loss: 0.0019
Epoch [800/1000], Average Loss: 0.0025
Epoch [900/1000], Average Loss: 0.0015
Epoch [1000/1000], Average Loss: 0.0017


In [46]:
import torch
import random
import numpy as np

from keras.preprocessing import sequence
from keras.datasets import imdb

# Load IMDB data from keras
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=1000)

print("Shape of x_train:", x_train.shape)
print("Shape of x_test:", x_test.shape)

# Preprocess the sequences with padding
x_train_padded = sequence.pad_sequences(x_train, maxlen=100)
x_test_padded = sequence.pad_sequences(x_test, maxlen=100)

print("Shape of x_train_padded:", x_train_padded.shape)
print("Shape of x_test_padded:", x_test_padded.shape)

Shape of x_train: (25000,)
Shape of x_test: (25000,)
Shape of x_train_padded: (25000, 100)
Shape of x_test_padded: (25000, 100)


In [58]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

x_train_tensor = torch.tensor(x_train_padded, dtype=torch.long)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
x_test_tensor = torch.tensor(x_test_padded, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create TensorDataset and DataLoader for batching
train_data = TensorDataset(x_train_tensor, y_train_tensor)
test_data = TensorDataset(x_test_tensor, y_test_tensor)

batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# Define the LSTM architecture
class LSTMClassifier(nn.Module):
     def __init__(self, max_features, embedding_dim, hidden_dim, num_layers):
         super(LSTMClassifier, self).__init__()
         self.embeddings = nn.Embedding(max_features, embedding_dim)
         self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
         self.classifier = nn.Linear(hidden_dim, 1)
         self.sigmoid = nn.Sigmoid()

     def forward(self, sentence):
         embed = self.embeddings(sentence)
         lstm_out, (h_n, c_n) = self.lstm(embed)
         out = self.classifier(h_n[-1])
         out = self.sigmoid(out)
         return out

# Instantiate the LSTM model, an adam optimizer and BCE loss
embedding_dim = 8
hidden_dim = 8
num_layers = 1
max_features = 1000

model = LSTMClassifier(max_features, embedding_dim, hidden_dim, num_layers)
optimizer = optim.Adam(model.parameters(), lr=0.005)
criterion = nn.BCELoss()

best_val_accuracy = 0.0
epochs = 10

# Train the model, print out the loss
for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    correct_train = 0
    total_train = 0

    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_x)
        
        # Get loss
        loss = criterion(outputs.squeeze(), batch_y)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
        # Check accuracy
        predicted = (outputs.squeeze() > 0.5).float()
        correct_train += (predicted == batch_y).sum().item()
        total_train += batch_y.size(0)

    avg_train_loss = train_loss / len(train_loader)
    train_accuracy = correct_train / total_train
    
    # Validate
    model.eval()
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            outputs = model(batch_x)
            predicted = (outputs.squeeze() > 0.5).float()
            correct_val += (predicted == batch_y).sum().item()
            total_val += batch_y.size(0)

    val_accuracy = correct_val / total_val
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy

    print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}")

    
# Print out the best validation accuracy. (5 pts)
print(f"Best Validation Accuracy: {best_val_accuracy:.4f}")


Epoch [1/10], Train Loss: 0.6362, Train Accuracy: 0.6268, Validation Accuracy: 0.6157
Epoch [2/10], Train Loss: 0.5247, Train Accuracy: 0.7443, Validation Accuracy: 0.7923
Epoch [3/10], Train Loss: 0.4352, Train Accuracy: 0.8021, Validation Accuracy: 0.8038
Epoch [4/10], Train Loss: 0.3868, Train Accuracy: 0.8292, Validation Accuracy: 0.8242
Epoch [5/10], Train Loss: 0.3681, Train Accuracy: 0.8395, Validation Accuracy: 0.8246
Epoch [6/10], Train Loss: 0.3510, Train Accuracy: 0.8475, Validation Accuracy: 0.8317
Epoch [7/10], Train Loss: 0.3399, Train Accuracy: 0.8522, Validation Accuracy: 0.8340
Epoch [8/10], Train Loss: 0.3286, Train Accuracy: 0.8591, Validation Accuracy: 0.8360
Epoch [9/10], Train Loss: 0.3213, Train Accuracy: 0.8608, Validation Accuracy: 0.8365
Epoch [10/10], Train Loss: 0.3118, Train Accuracy: 0.8654, Validation Accuracy: 0.8401
Best Validation Accuracy: 0.8401
