In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys
from sklearn.model_selection import train_test_split
import torch.nn.functional as F

In [2]:
train_data0 = np.load('data0.npy')
train_lab0 = np.load('lab0.npy')

train_data1 = np.load('data1.npy')
train_lab1 = np.load('lab1.npy')

train_data2 = np.load('data2.npy')
train_lab2 = np.load('lab2.npy')

final_train = np.concatenate((train_data0 , train_data1 , train_data2) , axis=0)
final_labels = np.concatenate((train_lab0,train_lab1,train_lab2), axis=0)




X_train, X_val, y_train, y_val = train_test_split(final_train, final_labels, test_size=0.2, random_state=42)

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

In [4]:
class MyDataset(Dataset):
    def __init__(self, images, labels):
        self.images = torch.FloatTensor(images.reshape(-1, 1, 40, 168)) / 255.0
        self.labels = torch.FloatTensor(labels)

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

In [5]:
train_dataset = MyDataset(X_train, y_train)
val_dataset = MyDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [6]:
class AdvancedDigitSumCNN(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(AdvancedDigitSumCNN, self).__init__()
        
       
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        
        
        self.attention = nn.Sequential(
            nn.Conv2d(128, 1, kernel_size=1),
            nn.Sigmoid()
        )
        
        
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        
        
        self.fc1 = nn.Linear(128, 256)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(256, 128)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.fc3 = nn.Linear(128, 1)
        
    def forward(self, x):
        
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.bn3(self.conv3(x)))
        
       
        attention_weights = self.attention(x)
        x = x * attention_weights
        
        
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        
        
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        
        return x



## Added focal l1 loss cause read that it might be better for such a task.
class FocalL1Loss(nn.Module):
    def __init__(self, alpha=2.0, beta=0.25, epsilon=1e-6):
        super().__init__()
        self.alpha = alpha
        self.beta = beta
        self.epsilon = epsilon
        
    def forward(self, pred, target):
        l1_loss = torch.abs(pred - target)
        l1_loss = torch.clamp(l1_loss, min=self.epsilon)  
        focal_factor = torch.pow(l1_loss, self.alpha)
        loss = focal_factor * l1_loss
        return loss.mean()

In [12]:
import torch
import torch.nn as nn
import torchvision.models as models

class ResNetSumPredictor(nn.Module):
    def __init__(self, pretrained=True):
        super(ResNetSumPredictor, self).__init__()
        # Load pretrained ResNet
        self.resnet = models.resnet50(pretrained=pretrained)

        # Modify the first convolutional layer to accept 1-channel input
        self.resnet.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=64,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False
        )

        if pretrained:
            state_dict = models.resnet50(pretrained=True).state_dict()

            state_dict['conv1.weight'] = state_dict['conv1.weight'].sum(dim=1, keepdim=True)
            self.resnet.load_state_dict(state_dict, strict=False)

        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_features, 1)  

    def forward(self, x):
        return self.resnet(x)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
advmodel = AdvancedDigitSumCNN()
advmodel.load_state_dict(torch.load('advmodel.pth', weights_only=True))
advmodel.to(device)

AdvancedDigitSumCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (attention): Sequential(
    (0): Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1))
    (1): Sigmoid()
  )
  (global_pool): AdaptiveAvgPool2d(output_size=1)
  (fc1): Linear(in_features=128, out_features=256, bias=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc3): Linear(in_features=128, out_features=1, bias=True)
)

In [8]:
advmodel.eval()
example_images, actual_labels = next(iter(val_loader))  
example_images = example_images.to(device) 


predictions = advmodel(example_images).detach().cpu().numpy()
rounded_pred = predictions.round()


for i in range(10): 
    print(f"Prediction: {rounded_pred[i][0]:.2f}, Actual: {actual_labels[i].item():.2f}")


Prediction: 15.00, Actual: 15.00
Prediction: 18.00, Actual: 18.00
Prediction: 19.00, Actual: 19.00
Prediction: 26.00, Actual: 26.00
Prediction: 25.00, Actual: 25.00
Prediction: 14.00, Actual: 14.00
Prediction: 10.00, Actual: 10.00
Prediction: 22.00, Actual: 23.00
Prediction: 17.00, Actual: 19.00
Prediction: 19.00, Actual: 20.00


In [9]:

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error

def evaluate_model(model, dataloader, device):
    """
    Evaluate model performance using multiple metrics
    """
    model.eval()
    all_predictions = []
    all_labels = []
    total_loss = 0
    criterion = torch.nn.MSELoss()

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            
            
            loss = criterion(outputs.squeeze(), labels)
            total_loss += loss.item() * images.size(0)
            
            
            predictions = outputs.squeeze().cpu().numpy().round()
            all_predictions.extend(predictions)
            all_labels.extend(labels.cpu().numpy())

    
    all_predictions = np.array(all_predictions)
    all_labels = np.array(all_labels)
    
    
    avg_mse = total_loss / len(dataloader.dataset)
    rmse = np.sqrt(avg_mse)

   
    mae = np.mean(np.abs(all_predictions - all_labels))
    
    
    accuracy = np.mean(all_predictions == all_labels)
    
    
    within_one_accuracy = np.mean(np.abs(all_predictions - all_labels) <= 1)

    metrics = {
        "mse": avg_mse,
        "rmse": rmse,
        "mae": mae,
        "exact_accuracy": accuracy * 100,
        "within_one_accuracy": within_one_accuracy * 100
    }
    
    print(metrics)

In [10]:
evaluate_model(advmodel,train_loader,device)

{'mse': 1.4782657799720764, 'rmse': 1.2158395370985746, 'mae': 0.9092917, 'exact_accuracy': 34.57083333333333, 'within_one_accuracy': 80.37083333333334}


In [11]:
evaluate_model(advmodel , val_loader , device)

{'mse': 2.3246416521072386, 'rmse': 1.5246775567664261, 'mae': 1.1206666, 'exact_accuracy': 30.133333333333333, 'within_one_accuracy': 72.6}


In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
resmodel = ResNetSumPredictor()
resmodel.load_state_dict(torch.load('resmodel.pth', weights_only=True))
resmodel.to(device)



ResNetSumPredictor(
  (resnet): ResNet(
    (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(


In [15]:
resmodel.eval()
example_images, actual_labels = next(iter(val_loader))  
example_images = example_images.to(device) 


predictions = resmodel(example_images).detach().cpu().numpy()
rounded_pred = predictions.round()


for i in range(10): 
    print(f"Prediction: {rounded_pred[i][0]:.2f}, Actual: {actual_labels[i].item():.2f}")

Prediction: 15.00, Actual: 15.00
Prediction: 18.00, Actual: 18.00
Prediction: 19.00, Actual: 19.00
Prediction: 26.00, Actual: 26.00
Prediction: 25.00, Actual: 25.00
Prediction: 14.00, Actual: 14.00
Prediction: 10.00, Actual: 10.00
Prediction: 23.00, Actual: 23.00
Prediction: 19.00, Actual: 19.00
Prediction: 20.00, Actual: 20.00


In [16]:
evaluate_model(resmodel,train_loader,device)

{'mse': 0.032732275544355316, 'rmse': 0.18092063327424907, 'mae': 0.016041666, 'exact_accuracy': 98.70833333333333, 'within_one_accuracy': 99.775}


In [18]:
evaluate_model(resmodel,val_loader,device)

{'mse': 0.26381338940560817, 'rmse': 0.513627675856362, 'mae': 0.09266666, 'exact_accuracy': 94.91666666666667, 'within_one_accuracy': 98.05}
