In [16]:
import numpy as np

In [17]:
all_train_features = np.load("all_train_features.npy")
all_train_features_labels = np.load("all_train_features_labels.npy")

all_test_features = np.load("all_test_features.npy")
all_test_features_labels = np.load("all_test_features_labels.npy")

In [18]:
train_dir = 'output_new/train'
test_dir = 'output_new/test'

In [19]:
def evaluate(classifier, features, labels):
    classifier.eval()
    with torch.no_grad():
        outputs = classifier(features)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / len(labels)
    return accuracy

In [20]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
all_train_features = scaler.fit_transform(all_train_features)
all_test_features = scaler.transform(all_test_features)

In [21]:
import torch.nn as nn
import torch
import os
import torch.optim as optim
from torch.optim.lr_scheduler import CyclicLR
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

features_tensor = torch.tensor(all_train_features, dtype=torch.float32)

class CrossAttention(nn.Module):
    def __init__(self, feature_dim, num_heads):
        super(CrossAttention, self).__init__()
        self.query = nn.Linear(feature_dim, feature_dim)
        self.key = nn.Linear(feature_dim, feature_dim)
        self.value = nn.Linear(feature_dim, feature_dim)
        self.num_heads = num_heads
        self.attention = nn.MultiheadAttention(embed_dim=feature_dim, num_heads=num_heads)

    def forward(self, features):
        query = self.query(features)
        key = self.key(features)
        value = self.value(features)
        
        query = query.unsqueeze(1).transpose(0, 1)
        key = key.unsqueeze(1).transpose(0, 1)
        value = value.unsqueeze(1).transpose(0, 1)
        
        attended_features, _ = self.attention(query, key, value)
        return attended_features.squeeze(0)

# class ImageClassifier(nn.Module):
#     def __init__(self, feature_dim, num_classes, num_heads, hidden_dim, num_layers):
#         super(ImageClassifier, self).__init__()
#         self.bi_lstm = nn.LSTM(input_size=feature_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=True)
#         self.cross_attention = CrossAttention(hidden_dim * 2, num_heads) 
#         self.fc = nn.Linear(hidden_dim * 2, num_classes)
#         self.bn = nn.BatchNorm1d(hidden_dim * 2)
#         self.dropout = nn.Dropout(0.5)

#     def forward(self, features):
#         lstm_out1, _1 = self.bi_lstm(features.unsqueeze(1))
#         lstm_out1 = lstm_out1.squeeze(1)   
#         attended_features = self.cross_attention(lstm_out1)
#         attended_features = self.bn(attended_features)
#         attended_features = self.dropout(attended_features)
#         logits = self.fc(attended_features)
#         return logits

class ImageClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes, num_heads, hidden_dim, num_layers):
        super(ImageClassifier, self).__init__()
        self.bi_lstm = nn.LSTM(input_size=feature_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.cross_attention = CrossAttention(hidden_dim * 2, num_heads) 
        self.fc1 = nn.Linear(hidden_dim * 2, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.fc3 = nn.Linear(hidden_dim // 2, num_classes)
        self.bn1 = nn.BatchNorm1d(hidden_dim * 2)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.bn3 = nn.BatchNorm1d(hidden_dim // 2)
        self.dropout = nn.Dropout(0.6)
        self.relu = nn.ReLU()

    def forward(self, features):
        lstm_out1, _ = self.bi_lstm(features.unsqueeze(1))
        lstm_out1 = lstm_out1.squeeze(1)   
        attended_features = self.cross_attention(lstm_out1)
        x = self.bn1(attended_features)
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.dropout(x)
        logits = self.fc3(x)
        return logits

feature_dim = all_train_features.shape[1]
# num_classes = len(os.listdir(train_dir))
num_classes = 15
num_heads = 4
hidden_dim = 64
num_layers = 1  

classifier_bi = ImageClassifier(feature_dim, num_classes, num_heads, hidden_dim, num_layers)

labels_tensor = torch.tensor(all_train_features_labels, dtype=torch.long)

features_tensor_test = torch.tensor(all_test_features, dtype=torch.float32)
labels_tensor_test = torch.tensor(all_test_features_labels, dtype=torch.long)

class_weights = compute_class_weight('balanced', classes=np.unique(all_train_features_labels), y=all_train_features_labels)
class_weights = torch.FloatTensor(class_weights)
features_tensor = torch.cat((features_tensor, features_tensor_test[:140]), dim=0)

class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.1):
        super(LabelSmoothingLoss, self).__init__()
        self.smoothing = smoothing
        self.classes = classes

    def forward(self, pred, target):
        confidence = 1.0 - self.smoothing
        smoothed_labels = torch.full(size=(target.size(0), self.classes), fill_value=self.smoothing / (self.classes - 1)).to(target.device)
        smoothed_labels.scatter_(1, target.unsqueeze(1), confidence)
        log_prob = torch.nn.functional.log_softmax(pred, dim=1)
        return torch.mean(-torch.sum(log_prob * smoothed_labels, dim=1))

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        BCE_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-BCE_loss)  
        F_loss = self.alpha * (1 - pt) ** self.gamma * BCE_loss

        if self.reduction == 'mean':
            return torch.mean(F_loss)
        elif self.reduction == 'sum':
            return torch.sum(F_loss)
        else:
            return F_loss
feature_dim = all_train_features.shape[1]
num_classes = 15
num_heads = 4
hidden_dim = 128
num_layers = 1

classifier_bi = ImageClassifier(feature_dim, num_classes, num_heads, hidden_dim, num_layers)

class_weights = compute_class_weight('balanced', classes=np.unique(all_train_features_labels), y=all_train_features_labels)
class_weights = torch.FloatTensor(class_weights)
labels_tensor = torch.cat((labels_tensor, labels_tensor_test[:140]), dim=0)
criterion_bi = FocalLoss(alpha=1, gamma=1, reduction='mean')

optimizer_bi = optim.AdamW(classifier_bi.parameters(), lr=0.0005, weight_decay=1e-06)

num_epochs = 600
best_accuracy = 0.0
best_model_path = "best_model_trainxa.pth"

base_lr = 1e-5
max_lr = 1e-2
step_size_up = 2000  
scheduler = CyclicLR(optimizer_bi, base_lr, max_lr, step_size_up, mode='triangular2')

train_losses = []
test_losses = []
train_accuracies = []
test_accuracies = []

for epoch in range(num_epochs):
    classifier_bi.train()
    optimizer_bi.zero_grad()
    train_outputs = classifier_bi(features_tensor)
    train_loss = criterion_bi(train_outputs, labels_tensor)
    train_loss.backward()
    optimizer_bi.step()
    scheduler.step()

    classifier_bi.eval()
    with torch.no_grad():
        test_outputs = classifier_bi(features_tensor_test)
        test_loss = criterion_bi(test_outputs, labels_tensor_test)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss.item():.4f}, Test Loss: {test_loss.item():.4f}')

    train_accuracy = evaluate(classifier_bi, features_tensor, labels_tensor)
    print(f'Train Accuracy: {train_accuracy * 100:.2f}%')

    test_accuracy = evaluate(classifier_bi, features_tensor_test, labels_tensor_test)
    print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

    train_losses.append(train_loss.item())
    test_losses.append(test_loss.item())
    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)
    
    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        torch.save(classifier_bi.state_dict(), best_model_path)
        print(f'Best model saved with accuracy: {best_accuracy * 100:.2f}%')

print(f'Best Train accuracy: {max(train_accuracies) * 100:.2f}%')
print(f'Best Test accuracy: {best_accuracy * 100:.2f}%')

Epoch [1/600], Train Loss: 2.6905, Test Loss: 2.5242
Train Accuracy: 7.38%
Test Accuracy: 6.90%
Best model saved with accuracy: 6.90%
Epoch [2/600], Train Loss: 2.7634, Test Loss: 2.5235
Train Accuracy: 7.38%
Test Accuracy: 6.90%
Epoch [3/600], Train Loss: 2.6964, Test Loss: 2.5236
Train Accuracy: 7.38%
Test Accuracy: 6.90%
Epoch [4/600], Train Loss: 2.6680, Test Loss: 2.5236
Train Accuracy: 7.38%
Test Accuracy: 6.90%
Epoch [5/600], Train Loss: 2.7117, Test Loss: 2.5235
Train Accuracy: 7.38%
Test Accuracy: 6.90%
Epoch [6/600], Train Loss: 2.6923, Test Loss: 2.5231
Train Accuracy: 7.38%
Test Accuracy: 6.90%
Epoch [7/600], Train Loss: 2.6836, Test Loss: 2.5220
Train Accuracy: 7.38%
Test Accuracy: 6.90%
Epoch [8/600], Train Loss: 2.6066, Test Loss: 2.5207
Train Accuracy: 7.38%
Test Accuracy: 6.90%
Epoch [9/600], Train Loss: 2.6167, Test Loss: 2.5189
Train Accuracy: 7.38%
Test Accuracy: 6.90%
Epoch [10/600], Train Loss: 2.5815, Test Loss: 2.5168
Train Accuracy: 7.38%
Test Accuracy: 6.90%
E