In [1]:
import numpy as np
import tqdm
import torch
import torch.nn as nn
from torch.optim import SGD
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [4]:
train = MNIST(root='', download=False, train=True, transform=transform)
train.extra_repr

<bound method MNIST.extra_repr of Dataset MNIST
    Number of datapoints: 60000
    Root location: 
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5,), std=(0.5,))
           )>

In [5]:
test = MNIST(root='', download=False, train=False, transform=transform)
test.extra_repr

<bound method MNIST.extra_repr of Dataset MNIST
    Number of datapoints: 10000
    Root location: 
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5,), std=(0.5,))
           )>

In [6]:
batch_size = 100

train_data_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
test_data_loader = DataLoader(test, batch_size=batch_size, shuffle=False)

In [7]:
class LSTMModel(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        
        self.fc = nn.Linear(self.hidden_dim, output_dim)
    
    def forward(self, x):
        # h0; LSTM'in hidden state'ini temsil eder.
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)
        
        # c0; LSTM'in *long term memory* olan cell state'ini temsil eder.
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)
        
        # requires_grad_(); durum tensorleri(h0, c0) ile gradient'leri takip etmeye ayarlanir.
        # Bu, backward propagation sirasinda bu tensorler uzerinde gradyan hesaplanmasini saglar.
        
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        # h0.detach(), c0.detach() -> Ilk hidden state ve cell state
        # detach(); fonksiyonu, tensorleri koparir ve gradient'leri bu tensorleri takip etmemesini saglar.
        
        out = self.fc(out[:, -1, :])
        return out

#### Pytorch'ta, herhangi bir tensorun gradyanini izlemek istiyorsak;
#### "requires_grad_" ozelligini True olarak ayarlamamiz gerekir. Bu, tensor uzerinde geriye dogru gecis sirasinda gradyanin hesaplanmasini saglar.
#### Modelin egitimi sirasinda, gradyanlar, loss func.'un turevi kullanilarak tum agirlik ve biaslara iletilir. Bu, agin ogrenmesini saglar.
####
#### Ancak, bazen bir durum tensorunu guncellemek istemeyebiliriz, cunku; bu tensor modelin gecmis durumunu temsil eder ve bu durumu sabit tutmak isteyebiliriz.
#### "detach()" fonksiyonu, gradyanlari gecici olarak kapatmanin bir yoludur. Yani "h0.detach()" ile "h0"'in gradyani takip edilmez.

RNN ve LSTM gibi tekrarlayan nn'ler, onceki durumlarini hatirlayarak calisirlar. Bu, ozellikle zaman serileri gibi ardisik verilerle calisirken cok onemlidir.

Egitim sirasinda, baslangic durumlari genellikle 0 veya kucuk Random Degerlerle baslatilirlar. Ancak, bazen baslangic durumlarini belirli bir duruma ayarlamak isteyebiliriz. Bu durumlar, modelin belirli bir baglami(ornegin, bir cumle veya bir zaman serisinin baslangici) hatirlamasina yardimci olabilir.

    Bu nedenlerle, "requires_grad_()" ve "detach()" fonksiyonlari, modelin egitim surecini daha iyi kontrol etmemize ve modelin gecmis durumunu daha etkili bir sekilde yonetmemize olanak tanir.

In [15]:
input_dim = 28
hidden_dim = 100
n_layers = 1
output_dim = 10

In [16]:
model = LSTMModel(input_dim, hidden_dim, n_layers, output_dim).to(device)

In [17]:
lr = 0.1
optimizer = SGD(model.parameters(), lr=lr)

criterion = nn.CrossEntropyLoss()

In [18]:
n_epochs = 30

for epoch in range(n_epochs):
    for image, label in train_data_loader:
        image, label = image.view(-1, 28, 28).to(device), label.to(device)
        
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch {epoch+1}/{n_epochs}     Loss: {loss.item()}')

Epoch 1/30     Loss: 0.5768505334854126
Epoch 2/30     Loss: 0.3834614157676697
Epoch 3/30     Loss: 0.20362384617328644
Epoch 4/30     Loss: 0.086998850107193
Epoch 5/30     Loss: 0.2771184742450714
Epoch 6/30     Loss: 0.029947252944111824
Epoch 7/30     Loss: 0.08183572441339493
Epoch 8/30     Loss: 0.016183361411094666
Epoch 9/30     Loss: 0.07552126795053482
Epoch 10/30     Loss: 0.01837044395506382
Epoch 11/30     Loss: 0.022792281582951546
Epoch 12/30     Loss: 0.04881245642900467
Epoch 13/30     Loss: 0.06550953537225723
Epoch 14/30     Loss: 0.013679934665560722
Epoch 15/30     Loss: 0.02766251377761364
Epoch 16/30     Loss: 0.11577854305505753
Epoch 17/30     Loss: 0.02463528700172901
Epoch 18/30     Loss: 0.01981210708618164
Epoch 19/30     Loss: 0.01268976554274559
Epoch 20/30     Loss: 0.05732066184282303
Epoch 21/30     Loss: 0.028308937326073647
Epoch 22/30     Loss: 0.020217137411236763
Epoch 23/30     Loss: 0.010319492779672146
Epoch 24/30     Loss: 0.00099614541977643

In [19]:
correct = 0
total = 0

In [20]:
with torch.no_grad():
    for image, label in test_data_loader:
        image, label = image.view(-1, 28, 28).to(device), label.to(device)
        
        output = model(image)
        _, predicted = torch.max(output.data, 1)
        total += label.size(0)
        correct += (predicted == label).sum().item()

In [21]:
accuracy = (correct / total) * 100
print(f'Accuracy: {accuracy:.5f}')

Accuracy: 98.53000


In [22]:
def predictSingleImage(model, image):
    
    model.eval()
    image = image.view(-1, 28, 28).to(device)
    
    with torch.no_grad():
        output = model(image)
        _, predict = torch.max(output.data, 1)
    
    return predict.item()

In [23]:
single_image, single_label = next(iter(test_data_loader))

predict_single_image = predictSingleImage(model, single_image[99])

print(f'Predict Value: {predict_single_image}  Real Value: {single_label[99].item()}')

Predict Value: 9  Real Value: 9


In [24]:
def predictMultiImage(model, images):
    
    model.eval()
    predictions = []
    
    with torch.no_grad():
        for image in images:
            image = image.view(-1, 28, 28).to(device)
            output = model(image)
            _, predict = torch.max(output.data, 1)
            predictions.append(predict.item())
    
    return predictions

In [25]:
multi_images, multi_labels = next(iter(test_data_loader))

predict_multi_images = predictMultiImage(model, multi_images[10:20])

for i, (predict_multi_image, true_label) in enumerate(zip(predict_multi_images, multi_labels[10:20])):
    print(f'{i+1}. Value -> Predict Value: {predict_multi_image}  Real Value: {true_label.item()}')

1. Value -> Predict Value: 0  Real Value: 0
2. Value -> Predict Value: 6  Real Value: 6
3. Value -> Predict Value: 9  Real Value: 9
4. Value -> Predict Value: 0  Real Value: 0
5. Value -> Predict Value: 1  Real Value: 1
6. Value -> Predict Value: 5  Real Value: 5
7. Value -> Predict Value: 9  Real Value: 9
8. Value -> Predict Value: 7  Real Value: 7
9. Value -> Predict Value: 8  Real Value: 3
10. Value -> Predict Value: 4  Real Value: 4
