# Model 2

In [1]:
import os
import torch
import random
import numpy as np
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import defaultdict
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import DataLoader, Subset, random_split, Dataset

## initiation:

In [2]:
class NumpyFolderDataset(Dataset):
    def __init__(self, root_dir, transform=None):

        self.root_dir = root_dir
        self.transform = transform
        self.samples = []  # List to store file paths and their labels

        # Traverse through the directory structure
        for class_idx, class_name in enumerate(sorted(os.listdir(root_dir))):
            class_dir = os.path.join(root_dir, class_name)
            if os.path.isdir(class_dir):
                for file_name in os.listdir(class_dir):
                    if file_name.endswith('.npy'):
                        file_path = os.path.join(class_dir, file_name)
                        self.samples.append((file_path, class_idx))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        file_path, label = self.samples[idx]
        np_array = np.load(file_path).astype(np.float32)  # Load the .npy file

        if self.transform:
            np_array = self.transform(np_array)
        return np_array, label

# Define transformations
transform = transforms.Compose([
    transforms.Lambda(lambda x: torch.tensor(x)),  # Convert NumPy array to PyTorch tensor
    transforms.Lambda(lambda x: x.unsqueeze(0)),   # Add channel dimension for CNN
])

# Path to your dataset
dataset_path = r'C:\Users\Admin\PycharmProjects\Ramin_Thesis\DataSet\320by320\12_class'

# Create the dataset
dataset = NumpyFolderDataset(root_dir=dataset_path, transform=transform)

# Split into training and test datasets
train_size = int(0.9 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Verify DataLoader
for batch_data, batch_labels in train_loader:
    print(f"Batch data shape: {batch_data.shape}, Batch labels: {batch_labels.shape}")
    break

Batch data shape: torch.Size([32, 1, 320, 320]), Batch labels: torch.Size([32])


### 1

In [4]:
class ImprovedCNNWithTransformer(nn.Module):
    def __init__(self, num_classes=8, num_transformer_layers=4, num_heads=16):
        super().__init__()

        # Convolutional Layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)

        self.shortcut = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=1, stride=2),
            nn.BatchNorm2d(128)
        )

        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)

        # Transformer Parameters
        self.embed_dim = 128  # Token embedding size
        self.global_pool = nn.AdaptiveAvgPool2d((4, 4))  # Fixed-size output for tokenization

        # Transformer Encoder
        encoder_layer = TransformerEncoderLayer(d_model=self.embed_dim, nhead=num_heads, dim_feedforward=512, dropout=0.4)
        self.transformer = TransformerEncoder(encoder_layer, num_layers=num_transformer_layers)

        # Fully Connected Layers
        self.fc1 = nn.Linear(self.embed_dim, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        self.dropout = nn.Dropout(0.4)

    def forward(self, x):
        # Block 1
        x = F.gelu(self.bn1(self.conv1(x)))
        x = F.max_pool2d(x, 2)

        # Block 2
        x = F.gelu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, 2)

        # Block 3 with Residual
        shortcut = self.shortcut(x)  # Downsample shortcut
        x = F.gelu(self.bn3(self.conv3(x)) + shortcut)

        # Global Pooling
        x = self.global_pool(x)  # Shape: [batch_size, 128, 4, 4]
        batch_size, channels, height, width = x.size()

        # Prepare Transformer Input
        x = x.view(batch_size, channels, -1).permute(0, 2, 1)  # Shape: [batch, 16, 128]

        # Transformer Encoder
        x = self.transformer(x)  # Shape: [batch, 16, 128]
        x = x.mean(dim=1)  # Aggregate token representations (Shape: [batch, 128])

        # Fully Connected Layers
        x = self.dropout(F.gelu(self.fc1(x)))
        x = self.fc2(x)
        return x
model = ImprovedCNNWithTransformer()

## Eval Section:

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-2)
num_epochs = 100  # Assuming a total of 50 epochs
model.to(device)

# Variable to track the best model
best_test_accuracy = 0.0
best_model_path = "best_model.pth"

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    # Training loop
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    # Calculate training metrics
    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct_predictions / total_samples
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, "
          f"Accuracy: {epoch_accuracy:.2%}, "
          f"LR: {optimizer.param_groups[0]['lr']:.6f}")

    # Start testing after 5th epoch
    if epoch >= 3:
        model.eval()
        correct_predictions = 0
        total_samples = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                correct_predictions += (predicted == labels).sum().item()
                total_samples += labels.size(0)

        # Calculate test accuracy
        test_accuracy = correct_predictions / total_samples
        print(f"Test Accuracy after Epoch {epoch+1}: {test_accuracy:.2%}")

        # Save the model if it's the best so far
        if test_accuracy > best_test_accuracy:
            best_test_accuracy = test_accuracy
            torch.save(model.state_dict(), best_model_path)
            print(f"###################### Best model saved with accuracy ######################: {best_test_accuracy:.2%}")

# Load the best model for further use
print(f"Training complete. Best model accuracy: {best_test_accuracy:.2%}")
model.load_state_dict(torch.load(best_model_path))


Epoch [1/100], Loss: 1.4947, Accuracy: 36.93%, LR: 0.000300
Epoch [2/100], Loss: 0.9868, Accuracy: 59.95%, LR: 0.000300
Epoch [3/100], Loss: 0.8177, Accuracy: 66.99%, LR: 0.000300
Epoch [4/100], Loss: 0.7589, Accuracy: 69.37%, LR: 0.000300
Test Accuracy after Epoch 4: 74.84%
###################### Best model saved with accuracy ######################: 74.84%
Epoch [5/100], Loss: 0.6855, Accuracy: 71.99%, LR: 0.000300
Test Accuracy after Epoch 5: 64.10%
Epoch [6/100], Loss: 0.6697, Accuracy: 73.17%, LR: 0.000300
Test Accuracy after Epoch 6: 77.08%
###################### Best model saved with accuracy ######################: 77.08%
Epoch [7/100], Loss: 0.5384, Accuracy: 78.65%, LR: 0.000300
Test Accuracy after Epoch 7: 78.21%
###################### Best model saved with accuracy ######################: 78.21%
Epoch [8/100], Loss: 0.4920, Accuracy: 80.63%, LR: 0.000300
Test Accuracy after Epoch 8: 81.25%
###################### Best model saved with accuracy ######################: 81.25%


KeyboardInterrupt: 

In [7]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Define your class names manually
class_names = (["Class_1", "Class_2", "Class_3", "Class_4",
               "Class_5", "Class_6", "Class_7", "Class_8"])

# "Class_9", "Class_10", "Class_11", "Class_12"]  # Replace with actual class names

def evaluate_model(model, test_loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="Evaluating"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)

            # Get predictions
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Convert to numpy arrays for evaluation
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Overall accuracy
    accuracy = accuracy_score(all_labels, all_preds)
    print(f"Overall Test Accuracy: {accuracy:.4f}")

    # Per-class accuracy
    class_accuracy = np.diag(confusion_matrix(all_labels, all_preds)) / np.bincount(all_labels)
    for i, acc in enumerate(class_accuracy):
        print(f"Class {i} Accuracy: {acc:.4f}")

    # Classification report
    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds, target_names=class_names))

    # Confusion matrix
    print("\nConfusion Matrix:")
    print(confusion_matrix(all_labels, all_preds))

    return accuracy

# Test the model
test_accuracy = evaluate_model(model, test_loader, device)


Evaluating: 100%|██████████| 20/20 [00:06<00:00,  3.21it/s]

Overall Test Accuracy: 0.9215
Class 0 Accuracy: 0.9706
Class 1 Accuracy: 0.8427
Class 2 Accuracy: 0.9565
Class 3 Accuracy: 0.9865
Class 4 Accuracy: 1.0000
Class 5 Accuracy: 0.9733
Class 6 Accuracy: 0.7976
Class 7 Accuracy: 0.8750

Classification Report:
              precision    recall  f1-score   support

     Class_1       0.96      0.97      0.96        68
     Class_2       0.87      0.84      0.86        89
     Class_3       0.98      0.96      0.97        92
     Class_4       0.90      0.99      0.94        74
     Class_5       0.99      1.00      0.99        70
     Class_6       0.90      0.97      0.94        75
     Class_7       0.99      0.80      0.88        84
     Class_8       0.81      0.88      0.84        72

    accuracy                           0.92       624
   macro avg       0.92      0.93      0.92       624
weighted avg       0.92      0.92      0.92       624


Confusion Matrix:
[[66  0  2  0  0  0  0  0]
 [ 0 75  0  0  0  0  1 13]
 [ 3  0 88  0  1  0  0


