<a href="https://colab.research.google.com/github/SelenaNahra/DL/blob/main/4106FinalProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Loading Dataset & Preprocessing**

In [9]:
import torch
import pandas as pd
import category_encoders as ce
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("heart_disease_uci.csv")
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 920 entries, 0 to 919
Data columns (total 16 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   id        920 non-null    int64  
 1   age       920 non-null    int64  
 2   sex       920 non-null    object 
 3   dataset   920 non-null    object 
 4   cp        920 non-null    object 
 5   trestbps  861 non-null    float64
 6   chol      890 non-null    float64
 7   fbs       830 non-null    object 
 8   restecg   918 non-null    object 
 9   thalch    865 non-null    float64
 10  exang     865 non-null    object 
 11  oldpeak   858 non-null    float64
 12  slope     611 non-null    object 
 13  ca        309 non-null    float64
 14  thal      434 non-null    object 
 15  num       920 non-null    int64  
dtypes: float64(5), int64(3), object(8)
memory usage: 115.1+ KB


Unnamed: 0,id,age,sex,dataset,cp,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal,num
0,1,63,Male,Cleveland,typical angina,145.0,233.0,True,lv hypertrophy,150.0,False,2.3,downsloping,0.0,fixed defect,0
1,2,67,Male,Cleveland,asymptomatic,160.0,286.0,False,lv hypertrophy,108.0,True,1.5,flat,3.0,normal,2
2,3,67,Male,Cleveland,asymptomatic,120.0,229.0,False,lv hypertrophy,129.0,True,2.6,flat,2.0,reversable defect,1
3,4,37,Male,Cleveland,non-anginal,130.0,250.0,False,normal,187.0,False,3.5,downsloping,0.0,normal,0
4,5,41,Female,Cleveland,atypical angina,130.0,204.0,False,lv hypertrophy,172.0,False,1.4,upsloping,0.0,normal,0


In [10]:
#remove irrelevant columns
df = df.drop(columns=['id','dataset'])

#remove columns with null values
df.isnull().values.any()
df = df.dropna()

df.info()
print(df.columns)

<class 'pandas.core.frame.DataFrame'>
Index: 299 entries, 0 to 748
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       299 non-null    int64  
 1   sex       299 non-null    object 
 2   cp        299 non-null    object 
 3   trestbps  299 non-null    float64
 4   chol      299 non-null    float64
 5   fbs       299 non-null    object 
 6   restecg   299 non-null    object 
 7   thalch    299 non-null    float64
 8   exang     299 non-null    object 
 9   oldpeak   299 non-null    float64
 10  slope     299 non-null    object 
 11  ca        299 non-null    float64
 12  thal      299 non-null    object 
 13  num       299 non-null    int64  
dtypes: float64(5), int64(2), object(7)
memory usage: 35.0+ KB
Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalch',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num'],
      dtype='object')


In [11]:
#encode Male = 0 & Female = 1
df['sex'] = df['sex'].map({'Female': 1, 'Male': 0})

#encode cp, restecg, slope, thal
onehot_encoder = ce.OneHotEncoder(cols=['cp', 'restecg', 'slope', 'thal'])
df = onehot_encoder.fit_transform(df)

#True = 1, False = 0
df[['fbs', 'exang']] = df[['fbs', 'exang']].astype(int)

#scale age, trestbps, chol, thalach, oldpeak, ca
scaler = StandardScaler()
columns_to_scale = ['age', 'trestbps', 'chol', 'thalch', 'oldpeak','ca']
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

df.head()

Unnamed: 0,age,sex,cp_1,cp_2,cp_3,cp_4,trestbps,chol,fbs,restecg_1,...,exang,oldpeak,slope_1,slope_2,slope_3,ca,thal_1,thal_2,thal_3,num
0,0.940446,0,1,0,0,0,0.74976,-0.262867,1,1,...,0,1.069475,1,0,0,-0.718306,1,0,0,0
1,1.384143,0,0,1,0,0,1.596354,0.747722,0,1,...,1,0.380309,0,1,0,2.487269,0,1,0,2
2,1.384143,0,0,1,0,0,-0.661231,-0.339138,0,1,...,1,1.327912,0,1,0,1.418744,0,0,1,1
3,-1.943588,0,0,0,1,0,-0.096835,0.061285,0,0,...,0,2.103224,1,0,0,-0.718306,0,1,0,0
4,-1.499891,1,0,0,0,1,-0.096835,-0.81583,0,1,...,0,0.294163,0,0,1,-0.718306,0,1,0,0


In [12]:
class_distribution = df['num'].value_counts()
sorted_distribution = class_distribution.sort_index()
print(sorted_distribution)

num
0    160
1     56
2     35
3     35
4     13
Name: count, dtype: int64


In [16]:
from sklearn.model_selection import train_test_split
import time
from imblearn.over_sampling import SMOTE
import torch.nn as nn
import torch.optim as optim
import numpy as np

X = df.drop(columns=["num"])
y = df["num"]

print("Shape of X", X.shape)
print("Shape of y", y.shape)

smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

print(y_resampled.value_counts())

X = X_resampled.values
y = y_resampled.values.reshape(-1, 1)

print("Shape of X", X.shape)
print("Shape of y", y.shape)


# 80-20 Split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print('Shape of X_train:', X_train.shape)
print('Shape of y_train:', y_train.shape)
print('Shape of X_val:', X_val.shape)
print('Shape of y_val:', y_val.shape)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long).squeeze()
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long).squeeze()

# Reshape X_val to include a batch dimension
X_val = X_val.unsqueeze(1)
X_train = X_train.unsqueeze(1)

print('Shape of X_train:', X_train.shape)
print('Shape of y_train:', y_train.shape)
print('Shape of X_val:', X_val.shape)
print('Shape of y_val:', y_val.shape)

Shape of X (299, 22)
Shape of y (299,)
num
0    160
2    160
1    160
3    160
4    160
Name: count, dtype: int64
Shape of X (800, 22)
Shape of y (800, 1)
Shape of X_train: (640, 22)
Shape of y_train: (640, 1)
Shape of X_val: (160, 22)
Shape of y_val: (160, 1)
Shape of X_train: torch.Size([640, 1, 22])
Shape of y_train: torch.Size([640])
Shape of X_val: torch.Size([160, 1, 22])
Shape of y_val: torch.Size([160])


**Hidden Size 128**

In [17]:
# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 128
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
batch_size = 64

start_time = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i+batch_size]
        targets = y_train[i:i+batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    end_time = time.time()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    total_time = end_time - start_time
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch [1/100], Loss: 1.5954
Epoch [2/100], Loss: 1.5645
Epoch [3/100], Loss: 1.4913
Epoch [4/100], Loss: 1.3662
Epoch [5/100], Loss: 1.2529
Epoch [6/100], Loss: 1.1998
Epoch [7/100], Loss: 1.1525
Epoch [8/100], Loss: 1.1093
Epoch [9/100], Loss: 1.0724
Epoch [10/100], Loss: 1.0426
Epoch [11/100], Loss: 1.0183
Epoch [12/100], Loss: 0.9956
Epoch [13/100], Loss: 0.9730
Epoch [14/100], Loss: 0.9505
Epoch [15/100], Loss: 0.9271
Epoch [16/100], Loss: 0.9031
Epoch [17/100], Loss: 0.8794
Epoch [18/100], Loss: 0.8565
Epoch [19/100], Loss: 0.8349
Epoch [20/100], Loss: 0.8147
Epoch [21/100], Loss: 0.7959
Epoch [22/100], Loss: 0.7781
Epoch [23/100], Loss: 0.7609
Epoch [24/100], Loss: 0.7441
Epoch [25/100], Loss: 0.7276
Epoch [26/100], Loss: 0.7112
Epoch [27/100], Loss: 0.6949
Epoch [28/100], Loss: 0.6788
Epoch [29/100], Loss: 0.6629
Epoch [30/100], Loss: 0.6473
Epoch [31/100], Loss: 0.6320
Epoch [32/100], Loss: 0.6172
Epoch [33/100], Loss: 0.6027
Epoch [34/100], Loss: 0.5887
Epoch [35/100], Loss: 0

**Hidden Size 512**

In [None]:
# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
batch_size = 64

start_time = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i+batch_size]
        targets = y_train[i:i+batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    end_time = time.time()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    total_time = end_time - start_time
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch [1/100], Loss: 1.5502
Epoch [2/100], Loss: 1.3356
Epoch [3/100], Loss: 1.1999
Epoch [4/100], Loss: 1.1076
Epoch [5/100], Loss: 1.0624
Epoch [6/100], Loss: 1.0249
Epoch [7/100], Loss: 0.9856
Epoch [8/100], Loss: 0.9468
Epoch [9/100], Loss: 0.9106
Epoch [10/100], Loss: 0.8789
Epoch [11/100], Loss: 0.8511
Epoch [12/100], Loss: 0.8295
Epoch [13/100], Loss: 0.8096
Epoch [14/100], Loss: 0.7893
Epoch [15/100], Loss: 0.7684
Epoch [16/100], Loss: 0.7471
Epoch [17/100], Loss: 0.7251
Epoch [18/100], Loss: 0.7021
Epoch [19/100], Loss: 0.6779
Epoch [20/100], Loss: 0.6522
Epoch [21/100], Loss: 0.6259
Epoch [22/100], Loss: 0.5998
Epoch [23/100], Loss: 0.5750
Epoch [24/100], Loss: 0.5513
Epoch [25/100], Loss: 0.5287
Epoch [26/100], Loss: 0.5070
Epoch [27/100], Loss: 0.4861
Epoch [28/100], Loss: 0.4658
Epoch [29/100], Loss: 0.4461
Epoch [30/100], Loss: 0.4269
Epoch [31/100], Loss: 0.4080
Epoch [32/100], Loss: 0.3894
Epoch [33/100], Loss: 0.3712
Epoch [34/100], Loss: 0.3534
Epoch [35/100], Loss: 0

**Hidden Size 1024**

In [None]:
# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 1024
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
batch_size = 64

start_time = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i+batch_size]
        targets = y_train[i:i+batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    end_time = time.time()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    total_time = end_time - start_time
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch [1/100], Loss: 1.4493
Epoch [2/100], Loss: 1.2153
Epoch [3/100], Loss: 1.0955
Epoch [4/100], Loss: 1.0497
Epoch [5/100], Loss: 1.0055
Epoch [6/100], Loss: 0.9504
Epoch [7/100], Loss: 0.9044
Epoch [8/100], Loss: 0.8711
Epoch [9/100], Loss: 0.8441
Epoch [10/100], Loss: 0.8219
Epoch [11/100], Loss: 0.8002
Epoch [12/100], Loss: 0.7751
Epoch [13/100], Loss: 0.7484
Epoch [14/100], Loss: 0.7204
Epoch [15/100], Loss: 0.6895
Epoch [16/100], Loss: 0.6540
Epoch [17/100], Loss: 0.6153
Epoch [18/100], Loss: 0.5791
Epoch [19/100], Loss: 0.5488
Epoch [20/100], Loss: 0.5221
Epoch [21/100], Loss: 0.4960
Epoch [22/100], Loss: 0.4698
Epoch [23/100], Loss: 0.4447
Epoch [24/100], Loss: 0.4209
Epoch [25/100], Loss: 0.3975
Epoch [26/100], Loss: 0.3740
Epoch [27/100], Loss: 0.3507
Epoch [28/100], Loss: 0.3276
Epoch [29/100], Loss: 0.3051
Epoch [30/100], Loss: 0.2830
Epoch [31/100], Loss: 0.2616
Epoch [32/100], Loss: 0.2413
Epoch [33/100], Loss: 0.2220
Epoch [34/100], Loss: 0.2036
Epoch [35/100], Loss: 0

**Smaller Learning Rate**

In [18]:
import torch
import torch.nn as nn

# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 50
batch_size = 64

start_time = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i+batch_size]
        targets = y_train[i:i+batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    end_time = time.time()
    if epoch % 10 == 0:
      print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    total_time = end_time - start_time
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch [1/50], Loss: 1.6078
Epoch [11/50], Loss: 1.4337
Epoch [21/50], Loss: 1.1817
Epoch [31/50], Loss: 1.0834
Epoch [41/50], Loss: 1.0268
Total training time: 46.97 seconds
Accuracy on test set: 0.62


**50 Epochs**

In [19]:
import torch
import torch.nn as nn

# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
num_epochs = 50
batch_size = 64

start_time = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i+batch_size]
        targets = y_train[i:i+batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    end_time = time.time()
    if epoch % 10 == 0:
      print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    total_time = end_time - start_time
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch [1/50], Loss: 1.1850
Epoch [11/50], Loss: 0.1011
Epoch [21/50], Loss: 0.0046
Epoch [31/50], Loss: 0.0006
Epoch [41/50], Loss: 0.0004
Total training time: 47.33 seconds
Accuracy on test set: 0.84


**Increased Batch Size**

In [20]:
import torch
import torch.nn as nn

# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 50
batch_size = 128

start_time = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i+batch_size]
        targets = y_train[i:i+batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    end_time = time.time()
    if epoch % 10 == 0:
      print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    total_time = end_time - start_time
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch [1/50], Loss: 1.6105
Epoch [11/50], Loss: 1.5688
Epoch [21/50], Loss: 1.4136
Epoch [31/50], Loss: 1.2359
Epoch [41/50], Loss: 1.1630
Total training time: 26.35 seconds
Accuracy on test set: 0.54


**Decreased Batch Size**

In [21]:
import torch
import torch.nn as nn

# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 50
batch_size = 32

start_time = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i+batch_size]
        targets = y_train[i:i+batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    end_time = time.time()
    if epoch % 10 == 0:
      print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    total_time = end_time - start_time
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch [1/50], Loss: 1.6032
Epoch [11/50], Loss: 1.1297
Epoch [21/50], Loss: 1.0182
Epoch [31/50], Loss: 0.9694
Epoch [41/50], Loss: 0.9038
Total training time: 68.76 seconds
Accuracy on test set: 0.60


**Increased Layers**

In [24]:
import torch
import torch.nn as nn

# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 3
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 100
batch_size = 64

start_time = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i+batch_size]
        targets = y_train[i:i+batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    end_time = time.time()
    if epoch % 10 == 0:
      print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    total_time = end_time - start_time
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch [1/100], Loss: 1.6093
Epoch [11/100], Loss: 1.2859
Epoch [21/100], Loss: 1.1264
Epoch [31/100], Loss: 1.0457
Epoch [41/100], Loss: 1.0080
Epoch [51/100], Loss: 1.0027
Epoch [61/100], Loss: 0.9865
Epoch [71/100], Loss: 0.9575
Epoch [81/100], Loss: 0.9190
Epoch [91/100], Loss: 0.8723
Total training time: 127.52 seconds
Accuracy on test set: 0.61
