# Компʼютерний практикум №16
Виконав студент групи ЗК-41мп Гломозда Костянтин

РЕКУРЕНТНІ МЕРЕЖІ (RNN, GRU, LSTM)

# Import necessary libraries

In [17]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# Device configuration and hyperparameters

In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 64
num_epochs = 10
learning_rate = 0.01

# MNIST dataset loading and preprocessing

In [19]:
train_dataset = torchvision.datasets.MNIST(
    root='./data/mnist',
    train=True,
    transform=transforms.ToTensor(),
    download=True
)

test_dataset = torchvision.datasets.MNIST(
    root='./data/mnist',
    train=False,
    transform=transforms.ToTensor()
)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Define the RNN model class

In [20]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

# Model initialization

In [21]:
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training the model

In [22]:
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        model.train()
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
  
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 2.3566
Epoch [2/10], Loss: 2.2502
Epoch [3/10], Loss: 2.3491
Epoch [4/10], Loss: 2.4235
Epoch [5/10], Loss: 2.3818
Epoch [6/10], Loss: 2.3251
Epoch [7/10], Loss: 2.3482
Epoch [8/10], Loss: 2.3759
Epoch [9/10], Loss: 2.6778
Epoch [10/10], Loss: 2.3740


# Evaluating the model on the test set

In [23]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Test Accuracy of the model on the 10000 test images: {100 * correct / total} %')

Test Accuracy of the model on the 10000 test images: 9.82 %


# Saving and testing the model

In [24]:
torch.save(model.state_dict(), 'models/rnn_model_1.ckpt')

# Print 10 predictions from test data
with torch.no_grad():
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        print("Predicted:", predicted[:10].cpu().numpy())
        print("Labels:", labels[:10].cpu().numpy())
        break

Predicted: [4 4 4 4 4 4 4 4 4 4]
Labels: [7 2 1 0 4 1 4 9 5 9]


Improve accuracy

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

sequence_length = 28
input_size = 28
hidden_size = 256  # Increased hidden size for better feature extraction
num_layers = 3  # Added more layers for depth
num_classes = 10
batch_size = 128  # Increased batch size for faster convergence
num_epochs = 10
learning_rate = 0.001  # Lowered learning rate for stable training
dropout_rate = 0.2  # Added dropout to prevent overfitting

# MNIST dataset loading and preprocessing
train_dataset = torchvision.datasets.MNIST(
    root='./data/mnist',
    train=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))  # Normalize input for better training
    ]),
    download=True
)

test_dataset = torchvision.datasets.MNIST(
    root='./data/mnist',
    train=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))  # Normalize input for better training
    ])
)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

class OptimizedRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_rate):
        super(OptimizedRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Model initialization
model = OptimizedRNN(input_size, hidden_size, num_layers, num_classes, dropout_rate).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        model.train()
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 0.0750
Epoch [2/10], Loss: 0.1020
Epoch [3/10], Loss: 0.0304
Epoch [4/10], Loss: 0.0302
Epoch [5/10], Loss: 0.0153
Epoch [6/10], Loss: 0.0187
Epoch [7/10], Loss: 0.0235
Epoch [8/10], Loss: 0.0178
Epoch [9/10], Loss: 0.0194
Epoch [10/10], Loss: 0.0139


Test model

In [2]:
# Evaluating the model on the test set
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Test Accuracy of the model on the 10000 test images: {100 * correct / total} %')

Test Accuracy of the model on the 10000 test images: 99.04 %


In [3]:
# Saving and testing the model
torch.save(model.state_dict(), 'models/optimized_rnn_model.ckpt')

# Print 10 predictions from test data
with torch.no_grad():
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        print("Predicted:", predicted[:10].cpu().numpy())
        print("Labels:", labels[:10].cpu().numpy())
        break

Predicted: [7 2 1 0 4 1 4 9 5 9]
Labels: [7 2 1 0 4 1 4 9 5 9]


# Using GRU architecture

In [4]:
# Import necessary libraries
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# Device configuration and hyperparameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

sequence_length = 28
input_size = 28
hidden_size = 256
num_layers = 3
num_classes = 10
batch_size = 128
num_epochs = 10
learning_rate = 0.001
dropout_rate = 0.2

# MNIST dataset loading and preprocessing
train_dataset = torchvision.datasets.MNIST(
    root='./data/mnist',
    train=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ]),
    download=True
)

test_dataset = torchvision.datasets.MNIST(
    root='./data/mnist',
    train=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Define the GRU-based model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_rate):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

# Initialize the model
model = GRUModel(input_size, hidden_size, num_layers, num_classes, dropout_rate).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        model.train()
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 0.0494
Epoch [2/10], Loss: 0.1136
Epoch [3/10], Loss: 0.0163
Epoch [4/10], Loss: 0.0627
Epoch [5/10], Loss: 0.0093
Epoch [6/10], Loss: 0.0036
Epoch [7/10], Loss: 0.0046
Epoch [8/10], Loss: 0.1276
Epoch [9/10], Loss: 0.0033
Epoch [10/10], Loss: 0.0105


In [5]:
# Evaluating the model on the test set
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Test Accuracy of the GRU model on the 10000 test images: {100 * correct / total} %')

Test Accuracy of the GRU model on the 10000 test images: 99.11 %


In [None]:
# Saving and testing the model
torch.save(model.state_dict(), 'models/gru_rnn.ckpt')

with torch.no_grad():
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        print("Predicted:", predicted[:10].cpu().numpy())
        print("Labels:", labels[:10].cpu().numpy())
        break

Predicted: [7 2 1 0 4 1 4 9 5 9]
Labels: [7 2 1 0 4 1 4 9 5 9]


# Using LTSM Architecture

In [7]:
# Import necessary libraries
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# Device configuration and hyperparameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

sequence_length = 28
input_size = 28
hidden_size = 256
num_layers = 3
num_classes = 10
batch_size = 128
num_epochs = 10
learning_rate = 0.001
dropout_rate = 0.2

# MNIST dataset loading and preprocessing
train_dataset = torchvision.datasets.MNIST(
    root='./data/mnist',
    train=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ]),
    download=True
)

test_dataset = torchvision.datasets.MNIST(
    root='./data/mnist',
    train=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Define the LSTM-based model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_rate):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Initialize the model
model = LSTMModel(input_size, hidden_size, num_layers, num_classes, dropout_rate).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        model.train()
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 0.1176
Epoch [2/10], Loss: 0.0557
Epoch [3/10], Loss: 0.0419
Epoch [4/10], Loss: 0.0537
Epoch [5/10], Loss: 0.0753
Epoch [6/10], Loss: 0.0613
Epoch [7/10], Loss: 0.0103
Epoch [8/10], Loss: 0.0247
Epoch [9/10], Loss: 0.0049
Epoch [10/10], Loss: 0.0736


In [8]:
# Evaluating the model on the test set
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Test Accuracy of the LSTM model on the 10000 test images: {100 * correct / total} %')

Test Accuracy of the LSTM model on the 10000 test images: 98.77 %


In [9]:
# Saving and testing the model
torch.save(model.state_dict(), 'models/lstm_model_rnn.ckpt')

with torch.no_grad():
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        print("Predicted:", predicted[:10].cpu().numpy())
        print("Labels:", labels[:10].cpu().numpy())
        break

Predicted: [7 2 1 0 4 1 4 9 5 9]
Labels: [7 2 1 0 4 1 4 9 5 9]


Summary: 
1. With improved custom RNN there was achieved 99% accuracy.
2. GRU architecture has also achieved 99% accuracy but way faster
3. LSTM achieved slightly less accuracy with the same time as GRU

We will stick to GRU architecture and adapt it for CIFAR10 dataset

In [11]:
# Import necessary libraries
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# Device configuration and hyperparameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

sequence_length = 32  # Treat each row of the image as a time step
input_size = 32       # Each row has 32 pixel values (1D input per step)
hidden_size = 512     # Increased hidden size for CIFAR10 complexity
num_layers = 3        # Maintain depth for better feature extraction
num_classes = 10      # CIFAR10 has 10 output classes
batch_size = 128
num_epochs = 20
learning_rate = 0.001
dropout_rate = 0.3    # Slightly higher dropout for regularization

# CIFAR10 dataset loading and preprocessing
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale for simplicity
    transforms.Resize((32, 32)),  # Ensure consistent size
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize input
])

train_dataset = torchvision.datasets.CIFAR10(
    root='./data/cifar10',
    train=True,
    transform=transform,
    download=True
)

test_dataset = torchvision.datasets.CIFAR10(
    root='./data/cifar10',
    train=False,
    transform=transform
)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Define the GRU-based model for CIFAR10
class GRUCIFAR(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_rate):
        super(GRUCIFAR, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

# Initialize the model
model = GRUCIFAR(input_size, hidden_size, num_layers, num_classes, dropout_rate).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        model.train()
        images = images.view(-1, sequence_length, input_size).to(device)  # Reshape for GRU
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar10/cifar-10-python.tar.gz


100%|██████████| 170M/170M [02:02<00:00, 1.39MB/s] 


Extracting ./data/cifar10/cifar-10-python.tar.gz to ./data/cifar10
Epoch [1/20], Loss: 1.7251
Epoch [2/20], Loss: 1.1145
Epoch [3/20], Loss: 1.2840
Epoch [4/20], Loss: 1.1281
Epoch [5/20], Loss: 0.8262
Epoch [6/20], Loss: 0.8169
Epoch [7/20], Loss: 0.8288
Epoch [8/20], Loss: 0.4626
Epoch [9/20], Loss: 0.3677
Epoch [10/20], Loss: 0.3729
Epoch [11/20], Loss: 0.5157
Epoch [12/20], Loss: 0.3552
Epoch [13/20], Loss: 0.3423
Epoch [14/20], Loss: 0.1857
Epoch [15/20], Loss: 0.2823
Epoch [16/20], Loss: 0.2608
Epoch [17/20], Loss: 0.3177
Epoch [18/20], Loss: 0.3515
Epoch [19/20], Loss: 0.4774
Epoch [20/20], Loss: 0.2757


In [12]:
# Evaluating the model on the test set
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.view(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Test Accuracy of the GRU model on CIFAR10: {100 * correct / total:.2f} %')

Test Accuracy of the GRU model on CIFAR10: 59.98 %


In [None]:
# Saving the model
torch.save(model.state_dict(), 'models/gru_cifar10_model_rnn.ckpt')

# Displaying predictions
with torch.no_grad():
    for images, labels in test_loader:
        images = images.view(-1, sequence_length, input_size).to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        print("Predicted:", predicted[:10].cpu().numpy())
        print("Labels:", labels[:10].cpu().numpy())
        break

Predicted: [4 1 8 8 6 6 8 3 3 1]
Labels: [3 8 8 0 6 6 1 6 3 1]


The accuracy is worse than in previous labs (>90%). Let's try few optimization for CIFAR100 dataset

In [15]:
# Import necessary libraries
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# Device configuration and hyperparameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

sequence_length = 32
input_size = 32
hidden_size = 512
num_layers = 4  # Increased depth for CIFAR100 complexity
num_classes = 100  # CIFAR100 has 100 output classes
batch_size = 128
num_epochs = 30  # Increased epochs for better convergence
learning_rate = 0.0005  # Lowered learning rate for stability
dropout_rate = 0.4  # Higher dropout rate to prevent overfitting

# CIFAR100 dataset loading and preprocessing
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale for simplicity
    transforms.Resize((32, 32)),  # Ensure consistent size
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize input
])

train_dataset = torchvision.datasets.CIFAR100(
    root='./data/cifar100/',
    train=True,
    transform=transform,
    download=True
)

test_dataset = torchvision.datasets.CIFAR100(
    root='./data/cifar100/',
    train=False,
    transform=transform
)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Define the GRU-based model for CIFAR100
class GRUCIFAR100(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_rate):
        super(GRUCIFAR100, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

# Initialize the model
model = GRUCIFAR100(input_size, hidden_size, num_layers, num_classes, dropout_rate).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)  # AdamW for better regularization

# Training the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        model.train()
        images = images.view(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar100/cifar-100-python.tar.gz


100%|██████████| 169M/169M [02:09<00:00, 1.30MB/s] 


Extracting ./data/cifar100/cifar-100-python.tar.gz to ./data/cifar100/
Epoch [1/30], Loss: 3.6883
Epoch [2/30], Loss: 3.4932
Epoch [3/30], Loss: 3.3434
Epoch [4/30], Loss: 2.8912
Epoch [5/30], Loss: 2.6317
Epoch [6/30], Loss: 2.9532
Epoch [7/30], Loss: 2.5863
Epoch [8/30], Loss: 2.4893
Epoch [9/30], Loss: 2.3768
Epoch [10/30], Loss: 2.2451
Epoch [11/30], Loss: 2.0422
Epoch [12/30], Loss: 1.6040
Epoch [13/30], Loss: 1.7357
Epoch [14/30], Loss: 1.2323
Epoch [15/30], Loss: 0.9799
Epoch [16/30], Loss: 1.0345
Epoch [17/30], Loss: 0.8323
Epoch [18/30], Loss: 0.6156
Epoch [19/30], Loss: 0.6901
Epoch [20/30], Loss: 0.6305
Epoch [21/30], Loss: 0.4033
Epoch [22/30], Loss: 0.4221
Epoch [23/30], Loss: 0.4783
Epoch [24/30], Loss: 0.3901
Epoch [25/30], Loss: 0.4003
Epoch [26/30], Loss: 0.4562
Epoch [27/30], Loss: 0.3241
Epoch [28/30], Loss: 0.4736
Epoch [29/30], Loss: 0.3211
Epoch [30/30], Loss: 0.3614


In [16]:
# Evaluating the model on the test set
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.view(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Test Accuracy of the GRU model on CIFAR100: {100 * correct / total:.2f} %')

Test Accuracy of the GRU model on CIFAR100: 28.09 %


In [None]:
# Saving the model
torch.save(model.state_dict(), 'models/gru_cifar100_model_rnn.ckpt')

# Displaying predictions
with torch.no_grad():
    for images, labels in test_loader:
        images = images.view(-1, sequence_length, input_size).to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        print("Predicted:", predicted[:10].cpu().numpy())
        print("Labels:", labels[:10].cpu().numpy())
        break

Predicted: [79 70 51 55 71 50 88 66 60 34]
Labels: [49 33 72 51 71 92 15 14 23  0]


Well, after several tests there wasn't achieve such a good accuracy as in previous labs. Maybe it can be done using LTSM with other hyperparameters for better optimization

Conclusion:
Оптимізація моделі для CIFAR100 передбачала збільшення глибини (4 шари), розмірів прихованого стану (512 нейронів), а також додавання регуляризації через Dropout (0.4) та оптимізатор AdamW. Такі параметри були обрані для врахування складності задачі з 100 класами. Однак, перехід до одноканальних (Grayscale) даних, збереження тільки часових залежностей та відсутність попередньої обробки, зокрема згорткових шарів (CNN), обмежили точність. Результат у 28.09% свідчить, що GRU без додаткового врахування просторових ознак не може повністю розкрити інформацію у зображеннях CIFAR100, і майбутні покращення варто шукати в поєднанні RNN та CNN.

1. Що являє собою рекурентна нейронна мережа?
2. Які різновиди рекурентних нейронних мереж існують?

1. Рекурентна нейронна мережа (RNN) — це модель штучного інтелекту, яка обробляє послідовні дані, використовуючи прихований стан для збереження інформації з попередніх часових кроків. Це дозволяє їй враховувати контекст у задачах, таких як обробка мови, часові ряди або послідовні дані.
2. Існують базові RNN, а також покращені версії для вирішення проблеми зникаючих/вибухаючих градієнтів: LSTM (Long Short-Term Memory) та GRU (Gated Recurrent Unit). Додатково існують моделі з двонаправленими зв'язками (Bidirectional RNN) та розширення, як Attention та Transformer, які поєднують RNN із сучасними підходами.