## Imports

In [31]:
import torch

In [32]:
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim

## set device

In [33]:
device = torch.device("mps")

## Create Fully Connected Network

In [34]:
## LSTM Network

class lstm_net(nn.Module):
    def __init__(self,input_size, hidden_size, num_layers, num_classes):
        super(lstm_net,self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        
        # self.net = nn.Sequential(
        #     nn.LSTM(input_size,hidden_size,num_layers,batch_first=True),
        # )
        
        # self.classifier = nn.Sequential(
        #     # nn.Linear(input_size * hidden_size, num_classes)
        #     nn.Linear(hidden_size, num_classes)
        # )
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        self.classifier = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device=device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device=device)
        
        out, _ = self.lstm(x,(h0, c0))
        # out = out.reshape(out.shape[0], -1)
        out = self.classifier(out[:,-1,:])
        return out
        

## Hyperparameters

In [35]:
num_layers = 2
input_size = 28
hidden_size = 256
sequence_length = 28
num_classes = 10
lr = 0.001
epochs = 5
batch_size = 64

In [36]:
train_dataset = datasets.MNIST(root = 'datasets/', train = True, transform = transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset = train_dataset, batch_size=batch_size, shuffle=True)

In [37]:
test_dataset = datasets.MNIST(root = 'datasets/', train = False, transform = transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset = test_dataset, batch_size=batch_size, shuffle=True)

## Initialize Network

In [38]:
model = lstm_net(num_layers,num_classes,hidden_size,input_size).to(device)

In [39]:
model.parameters

<bound method Module.parameters of lstm_net(
  (lstm): LSTM(2, 10, num_layers=256, batch_first=True)
  (classifier): Linear(in_features=10, out_features=28, bias=True)
)>

In [40]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = lr)

## Train Network

In [41]:
for epoch in range(epochs):
    for idx ,(data,target) in enumerate(train_loader):
        data = data.to(device=device)
        target = target.to(device=device)
        
    print(data.shape)

torch.Size([32, 1, 28, 28])
torch.Size([32, 1, 28, 28])
torch.Size([32, 1, 28, 28])
torch.Size([32, 1, 28, 28])
torch.Size([32, 1, 28, 28])


In [42]:
print(data.reshape(data.shape[0],-1))
print(data.reshape(data.shape[0],-1).shape)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='mps:0')
torch.Size([32, 784])


In [43]:
for  epoch in range(epochs):
    num_correct = 0
    num_samples = 0
    running_loss = 0.0
    for idx,(data,target) in enumerate(train_loader):
        data = data.to(device = device)
        target = target.to(device=device)
        
        scores =model(data)
        loss= criterion(scores,target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        _,predictions = scores.max(1) 
        num_correct = num_correct +  (predictions == target).sum()
        num_samples = num_samples + predictions.size(0)
            
    print(f"Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float (num_samples) * 100:.2f}")
        
    running_loss += loss.item()
    epoch_loss = running_loss / len(data)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

AttributeError: 'lstm_net' object has no attribute 'net'

In [145]:
len(data)

32

In [146]:
print((model.classifier[0].weight).shape)

torch.Size([10, 784])


## Check Accuracy

In [147]:
def check_accuracy(loader,model):
    num_correct = 0
    num_samples = 0
    
    model.eval()
    
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            # x = x.reshape(x.shape[0],-1)
            scores = model(x)
            _,predictions = scores.max(1) 
            num_correct = num_correct +  (predictions == y).sum()
            num_samples = num_samples + predictions.size(0)
            
        print(f"Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float (num_samples) * 100:.2f}")
    

In [152]:
predictions.size()

torch.Size([32])

In [148]:
check_accuracy(train_loader, model)

Got 58969/60000 with accuracy 98.28


In [150]:
check_accuracy(test_loader, model)

Got 9821/10000 with accuracy 98.21


In [149]:
scores.shape

torch.Size([32, 10])