Loading the model in 

In [18]:
import torch
import pandas as pd
from torch import nn 
import numpy as np

import joblib

# from common_utils import MLPBuilder
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
from sklearn.model_selection import train_test_split

modelname = 'CNN_Original_SWlab'

In [19]:
print("PyTorch version:", torch.__version__)

# Check if CUDA is available
print("CUDA available:", torch.cuda.is_available())

# Get current CUDA device index (if available)
if torch.cuda.is_available():
    print("Current CUDA device index:", torch.cuda.current_device())
    print("CUDA device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print("No CUDA devices found.")
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

PyTorch version: 2.4.1+cpu
CUDA available: False
No CUDA devices found.


In [20]:
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        # Convolutional Layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)   # output size = 8x8x32
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)  # output size = 4x4x64
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)  # output size = 4x4x128
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)  # output size = 4x4x256
        # Batch Normalization Layers
        self.batchnorm1 = nn.BatchNorm2d(32)
        self.batchnorm2 = nn.BatchNorm2d(64)
        self.batchnorm3 = nn.BatchNorm2d(128)
        self.batchnorm4 = nn.BatchNorm2d(256)
        # Fully Connected Layers
        self.fc1 = nn.Linear(256 * 4 * 4, 512)  # 4x4x256 -> 512
        self.fc2 = nn.Linear(512, 256)  # 512-> 256
        self.fc3 = nn.Linear(256, 64)  # 256-> 64

        # Dropout for regularization
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = x.unsqueeze(1)  
        x = F.relu(self.batchnorm1(self.conv1(x)))  
        x = F.relu(self.batchnorm2(self.conv2(x)))  
        x = F.relu(self.batchnorm3(self.conv3(x)))  
        x = F.relu(self.batchnorm4(self.conv4(x))) 

        x = x.view(x.size(0), -1)  # Flatten

        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.fc3(x)  # Output 64 logits (for 64 locations)

        return x

In [21]:
# we trained the model with 8 features

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN()
model = torch.load('models/CNN/'+modelname+'.pth', map_location=device)
model.to(device)
model.eval()  

  model = torch.load('models/CNN/'+modelname+'.pth', map_location=device)


CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchnorm1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=4096, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=64, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [22]:
for param in model.conv1.parameters():
    param.requires_grad = False
for param in model.conv2.parameters():
    param.requires_grad = False
for param in model.conv3.parameters():
    param.requires_grad = False
for param in model.conv4.parameters():
    param.requires_grad = True

for param in model.fc1.parameters():
    param.requires_grad = True
for param in model.fc2.parameters():
    param.requires_grad = True
for param in model.fc3.parameters():
    param.requires_grad = True


for name, param in model.named_parameters():
    print(f"Layer: {name} | Trainable: {param.requires_grad}")


Layer: conv1.weight | Trainable: False
Layer: conv1.bias | Trainable: False
Layer: conv2.weight | Trainable: False
Layer: conv2.bias | Trainable: False
Layer: conv3.weight | Trainable: False
Layer: conv3.bias | Trainable: False
Layer: conv4.weight | Trainable: True
Layer: conv4.bias | Trainable: True
Layer: batchnorm1.weight | Trainable: True
Layer: batchnorm1.bias | Trainable: True
Layer: batchnorm2.weight | Trainable: True
Layer: batchnorm2.bias | Trainable: True
Layer: batchnorm3.weight | Trainable: True
Layer: batchnorm3.bias | Trainable: True
Layer: batchnorm4.weight | Trainable: True
Layer: batchnorm4.bias | Trainable: True
Layer: fc1.weight | Trainable: True
Layer: fc1.bias | Trainable: True
Layer: fc2.weight | Trainable: True
Layer: fc2.bias | Trainable: True
Layer: fc3.weight | Trainable: True
Layer: fc3.bias | Trainable: True


## Loading test data in 

In [23]:
model_path = './models/CNN/'

import pickle

with open('./Data/CNN_Data/CNN_test_data.pkl', 'rb') as f:
    training_samples = pickle.load(f)


In [24]:
if training_samples:
    X_Sequence = np.array([sample[0] for sample in training_samples])  # Shape: (num_samples, Tx, 8)
    y_Sequence = np.array([sample[1] for sample in training_samples])  # Shape: (num_samples, 2) -> (X_Coord, Y_Coord)
else:
    X_Sequence = np.array([])
    y_Sequence = np.array([])

print("Generated training data shape:", X_Sequence.shape)
print("Generated labels shape:", y_Sequence.shape)

Generated training data shape: (6946, 8, 8)
Generated labels shape: (6946, 2)


In [25]:

# Perform train-test split (80% train, 20% test)

X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_Sequence, y_Sequence, test_size=0.15, random_state=42
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=0.1765, random_state=42  # 0.1765 * 0.85 ≈ 0.15
)

# Convert (X_Coord, Y_Coord) to a single class index
y_train = np.array([x * 8 + y for x, y in y_train])
y_val = np.array([x * 8 + y for x, y in y_val])
y_test = np.array([x * 8 + y for x, y in y_test])

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

y_train = torch.tensor(y_train, dtype=torch.long)  # Must be long for classification
y_val = torch.tensor(y_val, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train, X_val, X_test = X_train.to(device), X_val.to(device), X_test.to(device)
y_train, y_val, y_test = y_train.to(device), y_val.to(device), y_test.to(device)


# Implementing transfer learning in test environment

In [26]:
num_epochs = 100  # Adjust based on performance
batch_size = 32

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001, weight_decay = 1e-4)


In [27]:
def evaluate_model(model, X, y, dataset_name, batch_size=32):
    model.eval()
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    correct = 0
    total = 0
    total_displacement = 0.0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU/CPU
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)  # Get predicted class
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Convert predicted and actual labels back to (X, Y) coordinates
            predicted_coords = torch.stack((predicted // 8, predicted % 8), dim=1).float()
            actual_coords = torch.stack((labels // 8, labels % 8), dim=1).float()
            
            # Compute Euclidean distance (displacement)
            displacement = torch.norm(predicted_coords - actual_coords, dim=1).sum().item()
            total_displacement += displacement

    accuracy = 100 * correct / total
    average_displacement = total_displacement / total

    print(f"{dataset_name} Accuracy: {accuracy:.2f}%")
    print(f"{dataset_name} Average Displacement Error: {average_displacement:.4f}")


In [28]:
evaluate_model(model, X_test, y_test, "Test")

Test Accuracy: 43.86%
Test Average Displacement Error: 1.9783


In [29]:
patience = 20
best_val_loss = float("inf")

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = TensorDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Training loop
model.train()
no_improve_epochs = 0  # Early stopping counter

for epoch in range(num_epochs):
    running_loss = 0.0

    # Training phase
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU/CPU

        optimizer.zero_grad()
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)

    # Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        no_improve_epochs = 0
    else:
        no_improve_epochs += 1

    if no_improve_epochs >= patience:
        print(f"Early stopping at epoch {epoch + 1}")
        break



Epoch [1/100], Train Loss: 1.0056, Val Loss: 0.4519
Epoch [2/100], Train Loss: 0.8078, Val Loss: 0.4089
Epoch [3/100], Train Loss: 0.7283, Val Loss: 0.3774
Epoch [4/100], Train Loss: 0.5915, Val Loss: 0.3594
Epoch [5/100], Train Loss: 0.5291, Val Loss: 0.3198
Epoch [6/100], Train Loss: 0.5139, Val Loss: 0.3025
Epoch [7/100], Train Loss: 0.4695, Val Loss: 0.2691
Epoch [8/100], Train Loss: 0.3996, Val Loss: 0.2518
Epoch [9/100], Train Loss: 0.3841, Val Loss: 0.2675
Epoch [10/100], Train Loss: 0.3545, Val Loss: 0.2238
Epoch [11/100], Train Loss: 0.3476, Val Loss: 0.2994
Epoch [12/100], Train Loss: 0.3276, Val Loss: 0.2054
Epoch [13/100], Train Loss: 0.2887, Val Loss: 0.2390
Epoch [14/100], Train Loss: 0.2719, Val Loss: 0.1929
Epoch [15/100], Train Loss: 0.2389, Val Loss: 0.1761
Epoch [16/100], Train Loss: 0.2336, Val Loss: 0.2780
Epoch [17/100], Train Loss: 0.2184, Val Loss: 0.2238
Epoch [18/100], Train Loss: 0.2004, Val Loss: 0.2491
Epoch [19/100], Train Loss: 0.2232, Val Loss: 0.1993
Ep

In [30]:
evaluate_model(model, X_train, y_train, "Train")
evaluate_model(model, X_val, y_val, "Validation")
evaluate_model(model, X_test, y_test, "Test")

Train Accuracy: 99.88%
Train Average Displacement Error: 0.0037
Validation Accuracy: 92.52%
Validation Average Displacement Error: 0.2426
Test Accuracy: 87.62%
Test Average Displacement Error: 0.3731
