In [1]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import os

model = models.resnet50(pretrained=True)
model = torch.nn.Sequential(*(list(model.children())[:-1])) 

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def extract_features(image_path, model, transform):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)
    with torch.no_grad():
        features = model(image)
    return features.squeeze()

all_features = []
all_labels = []
image_paths = os.listdir('./graphs')

class_to_idx = {label: idx for idx, label in enumerate(set(image_paths))}
labels = [class_to_idx[label] for label in image_paths]

for image_path in image_paths:
    features = extract_features('./graphs/' + image_path, model, transform)
    all_features.append(features)

all_features = np.array(all_features)


  from .autonotebook import tqdm as notebook_tqdm
  all_features = np.array(all_features)
  all_features = np.array(all_features)


In [2]:
all_features = np.array([tensor.numpy() for tensor in all_features])

Cross Attention

In [14]:
import torch.nn as nn
import torch

class CrossAttention(nn.Module):
    def __init__(self, feature_dim, num_heads):
        super(CrossAttention, self).__init__()
        self.query = nn.Linear(feature_dim, feature_dim)
        self.key = nn.Linear(feature_dim, feature_dim)
        self.value = nn.Linear(feature_dim, feature_dim)
        self.num_heads = num_heads
        self.attention = nn.MultiheadAttention(embed_dim=feature_dim, num_heads=num_heads)

    def forward(self, features):
        query = self.query(features)
        key = self.key(features)
        value = self.value(features)
        
        query = query.unsqueeze(1).transpose(0, 1)
        key = key.unsqueeze(1).transpose(0, 1)
        value = value.unsqueeze(1).transpose(0, 1)
        
        attended_features, _ = self.attention(query, key, value)
        return attended_features.squeeze(0)

In [19]:
class ImageClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes, num_heads):
        super(ImageClassifier, self).__init__()
        self.cross_attention = CrossAttention(feature_dim, num_heads)
        self.fc = nn.Linear(feature_dim, num_classes)

    def forward(self, features):
        attended_features = self.cross_attention(features)
        logits = self.fc(attended_features)
        return logits

feature_dim = all_features.shape[1]
num_classes = len(class_to_idx)
num_heads = 8
classifier = ImageClassifier(feature_dim, num_classes, num_heads)

features_tensor = torch.tensor(all_features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.long)

In [20]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(classifier.parameters(), lr=0.001)

num_epochs = 500
for epoch in range(num_epochs):
    classifier.train()
    optimizer.zero_grad()
    outputs = classifier(features_tensor)
    loss = criterion(outputs, labels_tensor)
    loss.backward()
    optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/500], Loss: 2.7742
Epoch [2/500], Loss: 2.7724
Epoch [3/500], Loss: 2.7705
Epoch [4/500], Loss: 2.7682
Epoch [5/500], Loss: 2.7649
Epoch [6/500], Loss: 2.7604
Epoch [7/500], Loss: 2.7544
Epoch [8/500], Loss: 2.7465
Epoch [9/500], Loss: 2.7359
Epoch [10/500], Loss: 2.7222
Epoch [11/500], Loss: 2.7052
Epoch [12/500], Loss: 2.6907
Epoch [13/500], Loss: 2.6742
Epoch [14/500], Loss: 2.6291
Epoch [15/500], Loss: 2.5968
Epoch [16/500], Loss: 2.6084
Epoch [17/500], Loss: 2.5488
Epoch [18/500], Loss: 2.4495
Epoch [19/500], Loss: 2.4897
Epoch [20/500], Loss: 2.4741
Epoch [21/500], Loss: 2.2649
Epoch [22/500], Loss: 2.4566
Epoch [23/500], Loss: 2.2549
Epoch [24/500], Loss: 2.4845
Epoch [25/500], Loss: 2.1807
Epoch [26/500], Loss: 2.2967
Epoch [27/500], Loss: 1.9231
Epoch [28/500], Loss: 2.1427
Epoch [29/500], Loss: 2.1426
Epoch [30/500], Loss: 2.2471
Epoch [31/500], Loss: 1.7155
Epoch [32/500], Loss: 2.3188
Epoch [33/500], Loss: 1.9106
Epoch [34/500], Loss: 2.4980
Epoch [35/500], Loss: 1

In [21]:
def evaluate(classifier, features, labels):
    classifier.eval()
    with torch.no_grad():
        outputs = classifier(features)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / len(labels)
    return accuracy

accuracy = evaluate(classifier, features_tensor, labels_tensor)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 93.75%


Cross Attention With LSTM

In [4]:
import torch.nn as nn
import torch

class CrossAttention(nn.Module):
    def __init__(self, feature_dim, num_heads):
        super(CrossAttention, self).__init__()
        self.query = nn.Linear(feature_dim, feature_dim)
        self.key = nn.Linear(feature_dim, feature_dim)
        self.value = nn.Linear(feature_dim, feature_dim)
        self.num_heads = num_heads
        self.attention = nn.MultiheadAttention(embed_dim=feature_dim, num_heads=num_heads)

    def forward(self, features):
        query = self.query(features)
        key = self.key(features)
        value = self.value(features)
        
        query = query.unsqueeze(1).transpose(0, 1)
        key = key.unsqueeze(1).transpose(0, 1)
        value = value.unsqueeze(1).transpose(0, 1)
        
        attended_features, _ = self.attention(query, key, value)
        return attended_features.squeeze(0)

class ImageClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes, num_heads, hidden_dim, num_layers):
        super(ImageClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size=feature_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
        self.cross_attention = CrossAttention(hidden_dim, num_heads)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, features):
        lstm_out, _ = self.lstm(features.unsqueeze(1)) 
        lstm_out = lstm_out.squeeze(1) 
        attended_features = self.cross_attention(lstm_out)
        logits = self.fc(attended_features)
        return logits

feature_dim = all_features.shape[1]
num_classes = len(class_to_idx)
num_heads = 8
hidden_dim = 128 
num_layers = 2   

classifier_lstm_cross = ImageClassifier(feature_dim, num_classes, num_heads, hidden_dim, num_layers)

features_tensor = torch.tensor(all_features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.long)

import torch.optim as optim

criterion_lstm_cross = nn.CrossEntropyLoss()
optimizer_lstm_cross = optim.Adam(classifier_lstm_cross.parameters(), lr=0.001)

num_epochs = 500
for epoch in range(num_epochs):
    classifier_lstm_cross.train()
    optimizer_lstm_cross.zero_grad()
    outputs = classifier_lstm_cross(features_tensor)
    loss = criterion_lstm_cross(outputs, labels_tensor)
    loss.backward()
    optimizer_lstm_cross.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')



Epoch [1/500], Loss: 2.7733
Epoch [2/500], Loss: 2.7718
Epoch [3/500], Loss: 2.7702
Epoch [4/500], Loss: 2.7679
Epoch [5/500], Loss: 2.7646
Epoch [6/500], Loss: 2.7600
Epoch [7/500], Loss: 2.7541
Epoch [8/500], Loss: 2.7463
Epoch [9/500], Loss: 2.7362
Epoch [10/500], Loss: 2.7233
Epoch [11/500], Loss: 2.7073
Epoch [12/500], Loss: 2.6868
Epoch [13/500], Loss: 2.6645
Epoch [14/500], Loss: 2.6386
Epoch [15/500], Loss: 2.6001
Epoch [16/500], Loss: 2.5615
Epoch [17/500], Loss: 2.5113
Epoch [18/500], Loss: 2.4348
Epoch [19/500], Loss: 2.3767
Epoch [20/500], Loss: 2.5965
Epoch [21/500], Loss: 2.3416
Epoch [22/500], Loss: 2.4483
Epoch [23/500], Loss: 2.1121
Epoch [24/500], Loss: 2.2572
Epoch [25/500], Loss: 2.1283
Epoch [26/500], Loss: 2.0409
Epoch [27/500], Loss: 2.4353
Epoch [28/500], Loss: 1.8644
Epoch [29/500], Loss: 1.8733
Epoch [30/500], Loss: 2.4491
Epoch [31/500], Loss: 1.7438
Epoch [32/500], Loss: 2.4641
Epoch [33/500], Loss: 1.5223
Epoch [34/500], Loss: 2.0836
Epoch [35/500], Loss: 2

In [5]:
def evaluate(classifier, features, labels):
    classifier.eval()
    with torch.no_grad():
        outputs = classifier(features)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / len(labels)
    return accuracy

accuracy = evaluate(classifier_lstm_cross, features_tensor, labels_tensor)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 93.75%


Cross Attention With Bi-LSTM

In [6]:
import torch.nn as nn
import torch

class CrossAttention(nn.Module):
    def __init__(self, feature_dim, num_heads):
        super(CrossAttention, self).__init__()
        self.query = nn.Linear(feature_dim, feature_dim)
        self.key = nn.Linear(feature_dim, feature_dim)
        self.value = nn.Linear(feature_dim, feature_dim)
        self.num_heads = num_heads
        self.attention = nn.MultiheadAttention(embed_dim=feature_dim, num_heads=num_heads)

    def forward(self, features):
        query = self.query(features)
        key = self.key(features)
        value = self.value(features)
        
        query = query.unsqueeze(1).transpose(0, 1)
        key = key.unsqueeze(1).transpose(0, 1)
        value = value.unsqueeze(1).transpose(0, 1)
        
        attended_features, _ = self.attention(query, key, value)
        return attended_features.squeeze(0)

class ImageClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes, num_heads, hidden_dim, num_layers):
        super(ImageClassifier, self).__init__()
        self.bi_lstm = nn.LSTM(input_size=feature_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.cross_attention = CrossAttention(hidden_dim * 2, num_heads) 
        self.fc = nn.Linear(hidden_dim * 2, num_classes)

    def forward(self, features):
        lstm_out, _ = self.bi_lstm(features.unsqueeze(1))
        lstm_out = lstm_out.squeeze(1)  
        attended_features = self.cross_attention(lstm_out)
        logits = self.fc(attended_features)
        return logits

feature_dim = all_features.shape[1]
num_classes = len(class_to_idx)
num_heads = 8
hidden_dim = 128
num_layers = 2  

classifier_bi = ImageClassifier(feature_dim, num_classes, num_heads, hidden_dim, num_layers)

features_tensor = torch.tensor(all_features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.long)

import torch.optim as optim

criterion_bi = nn.CrossEntropyLoss()
optimizer_bi = optim.Adam(classifier_bi.parameters(), lr=0.001)

num_epochs = 500
for epoch in range(num_epochs):
    classifier_bi.train()
    optimizer_bi.zero_grad()
    outputs = classifier_bi(features_tensor)
    loss = criterion_bi(outputs, labels_tensor)
    loss.backward()
    optimizer_bi.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/500], Loss: 2.7732
Epoch [2/500], Loss: 2.7700
Epoch [3/500], Loss: 2.7647
Epoch [4/500], Loss: 2.7556
Epoch [5/500], Loss: 2.7403
Epoch [6/500], Loss: 2.7183
Epoch [7/500], Loss: 2.6898
Epoch [8/500], Loss: 2.6694
Epoch [9/500], Loss: 2.6269
Epoch [10/500], Loss: 2.5509
Epoch [11/500], Loss: 2.5579
Epoch [12/500], Loss: 2.4496
Epoch [13/500], Loss: 2.5148
Epoch [14/500], Loss: 2.2695
Epoch [15/500], Loss: 2.3073
Epoch [16/500], Loss: 2.0874
Epoch [17/500], Loss: 2.5537
Epoch [18/500], Loss: 2.5903
Epoch [19/500], Loss: 2.8851
Epoch [20/500], Loss: 2.8385
Epoch [21/500], Loss: 2.2347
Epoch [22/500], Loss: 2.4949
Epoch [23/500], Loss: 2.1859
Epoch [24/500], Loss: 2.1668
Epoch [25/500], Loss: 2.2511
Epoch [26/500], Loss: 2.1268
Epoch [27/500], Loss: 2.0088
Epoch [28/500], Loss: 1.8185
Epoch [29/500], Loss: 1.7495
Epoch [30/500], Loss: 1.6038
Epoch [31/500], Loss: 1.4308
Epoch [32/500], Loss: 2.4454
Epoch [33/500], Loss: 4.9636
Epoch [34/500], Loss: 1.9054
Epoch [35/500], Loss: 2

In [7]:
def evaluate(classifier, features, labels):
    classifier.eval()
    with torch.no_grad():
        outputs = classifier(features)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / len(labels)
    return accuracy

accuracy = evaluate(classifier_bi, features_tensor, labels_tensor)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 93.75%


Only LSTM

In [10]:
import torch.nn as nn
import torch

class ImageClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes, hidden_dim, num_layers):
        super(ImageClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size=feature_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, features):
        lstm_out, _ = self.lstm(features.unsqueeze(1))  
        lstm_out = lstm_out.squeeze(1)
        logits = self.fc(lstm_out) 
        return logits


feature_dim = all_features.shape[1]
num_classes = len(class_to_idx)
hidden_dim = 128 
num_layers = 2   

classifier_only_lstm = ImageClassifier(feature_dim, num_classes, hidden_dim, num_layers)

features_tensor = torch.tensor(all_features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.long)

import torch.optim as optim

criterion_only_lstm = nn.CrossEntropyLoss()
optimizer_only_lstm = optim.Adam(classifier_only_lstm.parameters(), lr=0.001)

num_epochs = 500
for epoch in range(num_epochs):
    classifier_only_lstm.train()
    optimizer_only_lstm.zero_grad()
    outputs = classifier_only_lstm(features_tensor)
    loss = criterion_only_lstm(outputs, labels_tensor)
    loss.backward()
    optimizer_only_lstm.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/500], Loss: 2.7735
Epoch [2/500], Loss: 2.7691
Epoch [3/500], Loss: 2.7641
Epoch [4/500], Loss: 2.7576
Epoch [5/500], Loss: 2.7501
Epoch [6/500], Loss: 2.7415
Epoch [7/500], Loss: 2.7322
Epoch [8/500], Loss: 2.7224
Epoch [9/500], Loss: 2.7113
Epoch [10/500], Loss: 2.7002
Epoch [11/500], Loss: 2.6872
Epoch [12/500], Loss: 2.6738
Epoch [13/500], Loss: 2.6584
Epoch [14/500], Loss: 2.6428
Epoch [15/500], Loss: 2.6258
Epoch [16/500], Loss: 2.6049
Epoch [17/500], Loss: 2.5805
Epoch [18/500], Loss: 2.5581
Epoch [19/500], Loss: 2.5321
Epoch [20/500], Loss: 2.5087
Epoch [21/500], Loss: 2.4780
Epoch [22/500], Loss: 2.4429
Epoch [23/500], Loss: 2.4059
Epoch [24/500], Loss: 2.3686
Epoch [25/500], Loss: 2.3291
Epoch [26/500], Loss: 2.2911
Epoch [27/500], Loss: 2.2699
Epoch [28/500], Loss: 2.2462
Epoch [29/500], Loss: 2.1547
Epoch [30/500], Loss: 2.1316
Epoch [31/500], Loss: 2.0925
Epoch [32/500], Loss: 2.0026
Epoch [33/500], Loss: 1.9775
Epoch [34/500], Loss: 1.9298
Epoch [35/500], Loss: 1

In [12]:
def evaluate(classifier, features, labels):
    classifier.eval()
    with torch.no_grad():
        outputs = classifier(features)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / len(labels)
    return accuracy

accuracy = evaluate(classifier_only_lstm, features_tensor, labels_tensor)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 93.75%


Bi-LSTM

In [15]:
import torch.nn as nn
import torch

class ImageClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes, hidden_dim, num_layers):
        super(ImageClassifier, self).__init__()
        self.bi_lstm = nn.LSTM(input_size=feature_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, num_classes) 

    def forward(self, features):
        lstm_out, _ = self.bi_lstm(features.unsqueeze(1))  
        lstm_out = lstm_out.squeeze(1)  
        logits = self.fc(lstm_out)  
        return logits

feature_dim = all_features.shape[1]
num_classes = len(class_to_idx)
hidden_dim = 128  
num_layers = 2    

classifier_bi_lstm_only = ImageClassifier(feature_dim, num_classes, hidden_dim, num_layers)

features_tensor = torch.tensor(all_features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.long)

import torch.optim as optim

criterion_bi_lstm_only = nn.CrossEntropyLoss()
optimizer_bi_lstm_only = optim.Adam(classifier_bi_lstm_only.parameters(), lr=0.001)

num_epochs = 500
for epoch in range(num_epochs):
    classifier_bi_lstm_only.train()
    optimizer_bi_lstm_only.zero_grad()
    outputs = classifier_bi_lstm_only(features_tensor)
    loss = criterion_bi_lstm_only(outputs, labels_tensor)
    loss.backward()
    optimizer_bi_lstm_only.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/500], Loss: 2.7734
Epoch [2/500], Loss: 2.7648
Epoch [3/500], Loss: 2.7543
Epoch [4/500], Loss: 2.7409
Epoch [5/500], Loss: 2.7235
Epoch [6/500], Loss: 2.7033
Epoch [7/500], Loss: 2.6795
Epoch [8/500], Loss: 2.6548
Epoch [9/500], Loss: 2.6253
Epoch [10/500], Loss: 2.5956
Epoch [11/500], Loss: 2.5600
Epoch [12/500], Loss: 2.5225
Epoch [13/500], Loss: 2.4768
Epoch [14/500], Loss: 2.4249
Epoch [15/500], Loss: 2.3770
Epoch [16/500], Loss: 2.3301
Epoch [17/500], Loss: 2.2673
Epoch [18/500], Loss: 2.2064
Epoch [19/500], Loss: 2.1402
Epoch [20/500], Loss: 2.0486
Epoch [21/500], Loss: 1.9813
Epoch [22/500], Loss: 1.9027
Epoch [23/500], Loss: 1.8727
Epoch [24/500], Loss: 1.7211
Epoch [25/500], Loss: 1.6571
Epoch [26/500], Loss: 1.5482
Epoch [27/500], Loss: 1.4620
Epoch [28/500], Loss: 1.4158
Epoch [29/500], Loss: 1.2859
Epoch [30/500], Loss: 1.3231
Epoch [31/500], Loss: 1.1634
Epoch [32/500], Loss: 1.0598
Epoch [33/500], Loss: 0.9549
Epoch [34/500], Loss: 0.9056
Epoch [35/500], Loss: 0

In [27]:
def evaluate(classifier, features, labels):
    classifier.eval()
    with torch.no_grad():
        outputs = classifier(features)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / len(labels)
    return accuracy

accuracy = evaluate(classifier_bi_lstm_only, features_tensor, labels_tensor)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 93.75%


LSTM with Single Attention Layer

In [18]:
import torch
import torch.nn as nn

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attention_weights = nn.Linear(hidden_dim, 1, bias=False)
    
    def forward(self, lstm_out):
        attention_scores = self.attention_weights(lstm_out)  
        attention_scores = torch.squeeze(attention_scores, -1)  
        attention_weights = torch.softmax(attention_scores, dim=1) 
        
        context_vector = torch.sum(attention_weights.unsqueeze(-1) * lstm_out, dim=1)  
        return context_vector

class ImageClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes, hidden_dim, num_layers):
        super(ImageClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size=feature_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
        self.attention = Attention(hidden_dim)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, features):
        lstm_out, _ = self.lstm(features.unsqueeze(1)) 
        context_vector = self.attention(lstm_out) 
        logits = self.fc(context_vector) 
        return logits

feature_dim = all_features.shape[1]
num_classes = len(class_to_idx)
hidden_dim = 128  
num_layers = 2    

classifier_bi_att = ImageClassifier(feature_dim, num_classes, hidden_dim, num_layers)

features_tensor = torch.tensor(all_features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.long)

import torch.optim as optim

criterion_bi_att = nn.CrossEntropyLoss()
optimizer_bi_att = optim.Adam(classifier_bi_att.parameters(), lr=0.001)

num_epochs = 500
for epoch in range(num_epochs):
    classifier_bi_att.train()
    optimizer_bi_att.zero_grad()
    outputs = classifier_bi_att(features_tensor)
    loss = criterion_bi_att(outputs, labels_tensor)
    loss.backward()
    optimizer_bi_att.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/500], Loss: 2.7744
Epoch [2/500], Loss: 2.7699
Epoch [3/500], Loss: 2.7658
Epoch [4/500], Loss: 2.7600
Epoch [5/500], Loss: 2.7533
Epoch [6/500], Loss: 2.7451
Epoch [7/500], Loss: 2.7364
Epoch [8/500], Loss: 2.7267
Epoch [9/500], Loss: 2.7161
Epoch [10/500], Loss: 2.7044
Epoch [11/500], Loss: 2.6913
Epoch [12/500], Loss: 2.6763
Epoch [13/500], Loss: 2.6609
Epoch [14/500], Loss: 2.6427
Epoch [15/500], Loss: 2.6243
Epoch [16/500], Loss: 2.6039
Epoch [17/500], Loss: 2.5824
Epoch [18/500], Loss: 2.5592
Epoch [19/500], Loss: 2.5338
Epoch [20/500], Loss: 2.5032
Epoch [21/500], Loss: 2.4713
Epoch [22/500], Loss: 2.4376
Epoch [23/500], Loss: 2.4050
Epoch [24/500], Loss: 2.3707
Epoch [25/500], Loss: 2.3310
Epoch [26/500], Loss: 2.2825
Epoch [27/500], Loss: 2.2367
Epoch [28/500], Loss: 2.2085
Epoch [29/500], Loss: 2.1567
Epoch [30/500], Loss: 2.0891
Epoch [31/500], Loss: 2.0431
Epoch [32/500], Loss: 1.9858
Epoch [33/500], Loss: 1.9319
Epoch [34/500], Loss: 1.8862
Epoch [35/500], Loss: 1

In [26]:
def evaluate(classifier, features, labels):
    classifier.eval()
    with torch.no_grad():
        outputs = classifier(features)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / len(labels)
    return accuracy

accuracy = evaluate(classifier_bi_att, features_tensor, labels_tensor)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 93.75%


LSTM With Double Attention Layer

In [22]:
import torch
import torch.nn as nn

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attention_weights = nn.Linear(hidden_dim, 1, bias=False)
    
    def forward(self, lstm_out):
        attention_scores = self.attention_weights(lstm_out)  
        attention_scores = torch.squeeze(attention_scores, -1)  
        attention_weights = torch.softmax(attention_scores, dim=1)  
        
        context_vector = torch.sum(attention_weights.unsqueeze(-1) * lstm_out, dim=1)  
        return context_vector

class ImageClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes, hidden_dim, num_layers):
        super(ImageClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size=feature_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
        self.attention1 = Attention(hidden_dim)
        self.attention2 = Attention(hidden_dim)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)  

    def forward(self, features):
        lstm_out, _ = self.lstm(features.unsqueeze(1)) 
        context_vector1 = self.attention1(lstm_out)  
        context_vector2 = self.attention2(lstm_out)  
        
        combined_context_vector = torch.cat((context_vector1, context_vector2), dim=1)  
        logits = self.fc(combined_context_vector)  
        return logits

feature_dim = all_features.shape[1]
num_classes = len(class_to_idx)
hidden_dim = 128  # You can choose the hidden dimension for the LSTM
num_layers = 2    # You can choose the number of LSTM layers

classifier_lstm_double_att = ImageClassifier(feature_dim, num_classes, hidden_dim, num_layers)

features_tensor = torch.tensor(all_features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.long)

import torch.optim as optim

criterion_lstm_double_att = nn.CrossEntropyLoss()
optimizer_lstm_double_att = optim.Adam(classifier_lstm_double_att.parameters(), lr=0.001)

num_epochs = 500
for epoch in range(num_epochs):
    classifier_lstm_double_att.train()
    optimizer_lstm_double_att.zero_grad()
    outputs = classifier_lstm_double_att(features_tensor)
    loss = criterion_lstm_double_att(outputs, labels_tensor)
    loss.backward()
    optimizer_lstm_double_att.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/500], Loss: 2.7737
Epoch [2/500], Loss: 2.7697
Epoch [3/500], Loss: 2.7653
Epoch [4/500], Loss: 2.7592
Epoch [5/500], Loss: 2.7511
Epoch [6/500], Loss: 2.7419
Epoch [7/500], Loss: 2.7309
Epoch [8/500], Loss: 2.7193
Epoch [9/500], Loss: 2.7062
Epoch [10/500], Loss: 2.6926
Epoch [11/500], Loss: 2.6769
Epoch [12/500], Loss: 2.6601
Epoch [13/500], Loss: 2.6414
Epoch [14/500], Loss: 2.6200
Epoch [15/500], Loss: 2.5996
Epoch [16/500], Loss: 2.5743
Epoch [17/500], Loss: 2.5467
Epoch [18/500], Loss: 2.5146
Epoch [19/500], Loss: 2.4801
Epoch [20/500], Loss: 2.4443
Epoch [21/500], Loss: 2.4080
Epoch [22/500], Loss: 2.3736
Epoch [23/500], Loss: 2.3398
Epoch [24/500], Loss: 2.2951
Epoch [25/500], Loss: 2.2233
Epoch [26/500], Loss: 2.1859
Epoch [27/500], Loss: 2.1590
Epoch [28/500], Loss: 2.0471
Epoch [29/500], Loss: 2.0297
Epoch [30/500], Loss: 1.9943
Epoch [31/500], Loss: 1.8715
Epoch [32/500], Loss: 1.8523
Epoch [33/500], Loss: 1.7763
Epoch [34/500], Loss: 1.6780
Epoch [35/500], Loss: 1

In [25]:
def evaluate(classifier, features, labels):
    classifier.eval()
    with torch.no_grad():
        outputs = classifier(features)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / len(labels)
    return accuracy

accuracy = evaluate(classifier_lstm_double_att, features_tensor, labels_tensor)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 93.75%


Bi-LSTM with Attention Layer

In [24]:
import torch
import torch.nn as nn

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attention_weights = nn.Linear(hidden_dim, 1, bias=False)
    
    def forward(self, lstm_out):
        attention_scores = self.attention_weights(lstm_out) 
        attention_scores = torch.squeeze(attention_scores, -1)  
        attention_weights = torch.softmax(attention_scores, dim=1)  
        
        context_vector = torch.sum(attention_weights.unsqueeze(-1) * lstm_out, dim=1)  
        return context_vector

class ImageClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes, hidden_dim, num_layers):
        super(ImageClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size=feature_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
        self.attention1 = Attention(hidden_dim)
        self.attention2 = Attention(hidden_dim)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)  

    def forward(self, features):
        lstm_out, _ = self.lstm(features.unsqueeze(1))  
        context_vector1 = self.attention1(lstm_out)  
        context_vector2 = self.attention2(lstm_out)  
        
        combined_context_vector = torch.cat((context_vector1, context_vector2), dim=1)  
        logits = self.fc(combined_context_vector)  
        return logits

feature_dim = all_features.shape[1]
num_classes = len(class_to_idx)
hidden_dim = 128 
num_layers = 2   

classifier_bi_att_ = ImageClassifier(feature_dim, num_classes, hidden_dim, num_layers)

features_tensor = torch.tensor(all_features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.long)

import torch.optim as optim

criterion_bi_att_ = nn.CrossEntropyLoss()
optimizer_bi_att_ = optim.Adam(classifier_bi_att_.parameters(), lr=0.001)

num_epochs = 500
for epoch in range(num_epochs):
    classifier_bi_att_.train()
    optimizer_bi_att_.zero_grad()
    outputs = classifier_bi_att_(features_tensor)
    loss = criterion_bi_att_(outputs, labels_tensor)
    loss.backward()
    optimizer_bi_att_.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/500], Loss: 2.7735
Epoch [2/500], Loss: 2.7695
Epoch [3/500], Loss: 2.7653
Epoch [4/500], Loss: 2.7598
Epoch [5/500], Loss: 2.7530
Epoch [6/500], Loss: 2.7448
Epoch [7/500], Loss: 2.7354
Epoch [8/500], Loss: 2.7246
Epoch [9/500], Loss: 2.7123
Epoch [10/500], Loss: 2.6987
Epoch [11/500], Loss: 2.6827
Epoch [12/500], Loss: 2.6654
Epoch [13/500], Loss: 2.6463
Epoch [14/500], Loss: 2.6262
Epoch [15/500], Loss: 2.6044
Epoch [16/500], Loss: 2.5807
Epoch [17/500], Loss: 2.5509
Epoch [18/500], Loss: 2.5207
Epoch [19/500], Loss: 2.4902
Epoch [20/500], Loss: 2.4565
Epoch [21/500], Loss: 2.4179
Epoch [22/500], Loss: 2.3769
Epoch [23/500], Loss: 2.3401
Epoch [24/500], Loss: 2.3054
Epoch [25/500], Loss: 2.2532
Epoch [26/500], Loss: 2.1798
Epoch [27/500], Loss: 2.1367
Epoch [28/500], Loss: 2.1071
Epoch [29/500], Loss: 2.0287
Epoch [30/500], Loss: 1.9584
Epoch [31/500], Loss: 1.9255
Epoch [32/500], Loss: 1.8291
Epoch [33/500], Loss: 1.7794
Epoch [34/500], Loss: 1.6985
Epoch [35/500], Loss: 1

In [29]:
def evaluate(classifier, features, labels):
    classifier.eval()
    with torch.no_grad():
        outputs = classifier(features)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / len(labels)
    return accuracy

accuracy = evaluate(classifier_bi_att_, features_tensor, labels_tensor)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 93.75%


Bi-LSTM With Double Attention Layer

In [30]:
import torch
import torch.nn as nn

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attention_weights = nn.Linear(hidden_dim, 1, bias=False)
    
    def forward(self, lstm_out):
        attention_scores = self.attention_weights(lstm_out)  
        attention_scores = torch.squeeze(attention_scores, -1)  
        attention_weights = torch.softmax(attention_scores, dim=1) 
        
        context_vector = torch.sum(attention_weights.unsqueeze(-1) * lstm_out, dim=1)  
        return context_vector

class ImageClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes, hidden_dim, num_layers):
        super(ImageClassifier, self).__init__()
        self.bi_lstm = nn.LSTM(input_size=feature_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.attention1 = Attention(hidden_dim * 2)  
        self.attention2 = Attention(hidden_dim * 2)  
        self.fc = nn.Linear(hidden_dim * 4, num_classes)  

    def forward(self, features):
        lstm_out, _ = self.bi_lstm(features.unsqueeze(1))  
        context_vector1 = self.attention1(lstm_out)  
        context_vector2 = self.attention2(lstm_out)  
        
        combined_context_vector = torch.cat((context_vector1, context_vector2), dim=1)  
        logits = self.fc(combined_context_vector) 
        return logits

feature_dim = all_features.shape[1]
num_classes = len(class_to_idx)
hidden_dim = 128  
num_layers = 2    

classifier_bi_double_att = ImageClassifier(feature_dim, num_classes, hidden_dim, num_layers)

features_tensor = torch.tensor(all_features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.long)

import torch.optim as optim

criterion_bi_double_att = nn.CrossEntropyLoss()
optimizer_bi_double_att = optim.Adam(classifier_bi_double_att.parameters(), lr=0.001)

num_epochs = 500
for epoch in range(num_epochs):
    classifier_bi_double_att.train()
    optimizer_bi_double_att.zero_grad()
    outputs = classifier_bi_double_att(features_tensor)
    loss = criterion_bi_double_att(outputs, labels_tensor)
    loss.backward()
    optimizer_bi_double_att.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/500], Loss: 2.7729
Epoch [2/500], Loss: 2.7652
Epoch [3/500], Loss: 2.7549
Epoch [4/500], Loss: 2.7411
Epoch [5/500], Loss: 2.7216
Epoch [6/500], Loss: 2.6981
Epoch [7/500], Loss: 2.6695
Epoch [8/500], Loss: 2.6377
Epoch [9/500], Loss: 2.6028
Epoch [10/500], Loss: 2.5654
Epoch [11/500], Loss: 2.5280
Epoch [12/500], Loss: 2.4699
Epoch [13/500], Loss: 2.4117
Epoch [14/500], Loss: 2.3690
Epoch [15/500], Loss: 2.3012
Epoch [16/500], Loss: 2.2124
Epoch [17/500], Loss: 2.1407
Epoch [18/500], Loss: 2.1072
Epoch [19/500], Loss: 2.0191
Epoch [20/500], Loss: 1.9718
Epoch [21/500], Loss: 1.8118
Epoch [22/500], Loss: 1.7843
Epoch [23/500], Loss: 1.7107
Epoch [24/500], Loss: 1.5484
Epoch [25/500], Loss: 1.5729
Epoch [26/500], Loss: 1.4277
Epoch [27/500], Loss: 1.3032
Epoch [28/500], Loss: 1.1636
Epoch [29/500], Loss: 1.1213
Epoch [30/500], Loss: 0.9998
Epoch [31/500], Loss: 0.9617
Epoch [32/500], Loss: 0.8314
Epoch [33/500], Loss: 0.7135
Epoch [34/500], Loss: 0.7163
Epoch [35/500], Loss: 0

In [31]:
def evaluate(classifier, features, labels):
    classifier.eval()
    with torch.no_grad():
        outputs = classifier(features)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / len(labels)
    return accuracy

accuracy = evaluate(classifier_bi_double_att, features_tensor, labels_tensor)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 93.75%
