In [22]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms 
from PIL import Image
# Custom Dataset class
class DigitSumDataset(Dataset):
    def __init__(self, data_files, label_files,transform ):
        # Load all data and labels
        self.data = []
        self.labels = []
        for data_file, label_file in zip(data_files, label_files):
            self.data.append(np.load(data_file))  # Load image data
            self.labels.append(np.load(label_file))  # Load labels

        # Combine all data and labels into a single array
        self.data = np.concatenate(self.data, axis=0)  # Shape: (N, H, W)
        self.labels = np.concatenate(self.labels, axis=0)  # Shape: (N,)
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image, label = self.data[idx], self.labels[idx]

        # Convert NumPy array to PIL Image
        image = Image.fromarray(image.astype(np.uint8))  # Convert to 8-bit grayscale image

        if self.transform:
            image = self.transform(image)

        label = torch.tensor(label, dtype=torch.long)
        return image, torch.zeros(40, dtype=torch.long), label,"--.png"


transform = transforms.Compose([
#    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1)),  # Random rotation ±15° and shifts up to 10%
    transforms.Resize((40, 168)),  # Resize image to the correct size
    transforms.ToTensor(),         # Convert image to Tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize (for grayscale images)
])

# File paths
data_files = ["data0.npy", "data1.npy", "data2.npy"]
label_files = ["lab0.npy", "lab1.npy", "lab2.npy"]

# Create the dataset
dataset = DigitSumDataset(data_files, label_files, transform=transform)

from torch.utils.data import DataLoader, Subset

# Assuming `dataset` is your dataset
dataset_size = len(dataset)
split = int(0.95 * dataset_size)  # 80% for training

# Define train and test indices based on order
train_indices = list(range(split))
test_indices = list(range(split, dataset_size))

# Use Subset to create train and test datasets
train_dataset = Subset(dataset, train_indices)
test_dataset = Subset(dataset, test_indices)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

# DataLoader for batching
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



Train dataset size: 28500
Test dataset size: 1500


In [23]:
for images,_ , labels,_ in train_loader:
    print("Images shape:", images.shape)  # (batch_size, C, H, W)
    print("Labels shape:", labels.shape)  # (batch_size,)
    break  # Process the first batch only


Images shape: torch.Size([16, 1, 40, 168])
Labels shape: torch.Size([16])


In [24]:
from models import MNISTDigitModel,MNISTSumModel,CombinedModel
import torch.optim as optim
new_dropout = 0.001
digit_model = MNISTDigitModel(num_blocks=5, kernel_size=3, activation='relu', pool='max', dropout=new_dropout)
sum_model = MNISTSumModel()
combined_model = CombinedModel(digit_model, sum_model)
new_learning_rate = 0.00001 ##0.0001 for CNN
optimizer = optim.Adam(filter(lambda p: p.requires_grad, combined_model.parameters()), lr=new_learning_rate)



In [25]:
import os
checkpoint_path = './checkpoints_comb/checkpoint_epoch_800.pth'
start_epoch = 0
if os.path.exists(checkpoint_path):
    print(f"Loading combined model checkpoint from {checkpoint_path}...")
    checkpoint = torch.load(checkpoint_path)
    combined_model.load_state_dict(checkpoint['model_state_dict'])
    start_epoch = checkpoint['epoch']
    print(f"Inference on  from epoch {start_epoch}...")


Loading combined model checkpoint from ./checkpoints_comb/checkpoint_epoch_800.pth...
Inference on  from epoch 800...


  checkpoint = torch.load(checkpoint_path)


In [26]:
combined_model.eval()
# Evaluation on test_loader
with torch.no_grad():  # Disable gradient computation for evaluation
    for images, _, labels_sum, nam in test_loader: 
        # Forward pass
        _, sum_output = combined_model(images)
        for i in range(10):
            print(f"Sample :",nam[i])
            print(f"Original Sum: {labels_sum[i]}")
            print(f"Predicted Sum: {sum_output[i].item()}")  

        break  # Just evaluate the first batch and exit the loop


Sample : --.png
Original Sum: 18
Predicted Sum: 13.900890350341797
Sample : --.png
Original Sum: 22
Predicted Sum: 17.575048446655273
Sample : --.png
Original Sum: 27
Predicted Sum: 26.770898818969727
Sample : --.png
Original Sum: 19
Predicted Sum: 18.929349899291992
Sample : --.png
Original Sum: 13
Predicted Sum: 16.574840545654297
Sample : --.png
Original Sum: 19
Predicted Sum: 16.74129295349121
Sample : --.png
Original Sum: 21
Predicted Sum: 20.68996810913086
Sample : --.png
Original Sum: 12
Predicted Sum: 15.918709754943848
Sample : --.png
Original Sum: 20
Predicted Sum: 20.56634521484375
Sample : --.png
Original Sum: 29
Predicted Sum: 26.68421745300293


In [27]:
import torch
import torch.nn.functional as F

# Initialize variables for MSE and accuracy calculations
total_mse = 0
correct_predictions = 0
total_samples = 0

combined_model.eval()  # Set the model to evaluation mode

with torch.no_grad():  # Disable gradient computation for evaluation
    for images, _, labels_sum, nam in test_loader:  # Adjust based on your dataset
        # Forward pass
        _, sum_output = combined_model(images)
        
        # Calculate MSE for the batch
        mse_batch = F.mse_loss(sum_output.squeeze(), labels_sum, reduction='sum')  # Sum MSE for the batch
        total_mse += mse_batch.item()
        
        # Round predictions and calculate accuracy
        rounded_preds = sum_output.round().squeeze()  # Round predictions to the nearest integer
        correct_predictions += (rounded_preds == labels_sum).sum().item()
        total_samples += labels_sum.size(0)
        

# Final metrics
mse = total_mse / total_samples  # Mean Squared Error
accuracy = (correct_predictions / total_samples) * 100  # Accuracy as a percentage

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Accuracy: {accuracy:.2f}%")


Mean Squared Error (MSE): 5.1791
Accuracy: 25.93%
