In [38]:
import torch
import numpy as np
from tqdm import tqdm
import torch.nn as nn
from torchvision.models import resnet18
from torch.utils.data import DataLoader
import pickle

In [39]:
class DigitRecognitionModel(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.digit_cnn = resnet18(weights='DEFAULT')
        self.digit_cnn.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        in_features = self.digit_cnn.fc.in_features
        self.digit_cnn.fc = nn.Linear(in_features, num_classes)
        self.sum_predictor = nn.Sequential(
            nn.Linear(40, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 37)
        )

    def forward(self, x):
        batch_size, num_digits = x.shape[:2]
        digit_features = []
        for i in range(num_digits):
            digit_out = self.digit_cnn(x[:, i])
            digit_features.append(digit_out)
        digit_preds = torch.stack(digit_features, dim=1)
        sum_pred = self.sum_predictor(digit_preds.view(batch_size, -1))
        return sum_pred

In [40]:
def evaluate_model(model, data_loader, criterion, device):
    """Evaluate model on given dataset"""
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    predictions = []
    true_labels = []

    with torch.no_grad():
        for batch in tqdm(data_loader, desc='Evaluating'):
            digits = batch['digits'].to(device)
            labels = batch['label'].to(device)

            outputs = model(digits)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            predictions.extend(predicted.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    accuracy = 100. * correct / total
    avg_loss = total_loss / len(data_loader)

    return {
        'accuracy': accuracy,
        'loss': avg_loss,
        'predictions': np.array(predictions),
        'true_labels': np.array(true_labels)
    }

In [30]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [31]:
print("Loading model...")
model = DigitRecognitionModel().to(device)
model.load_state_dict(torch.load('best_digit_recognition_model.pt'))

Loading model...


  model.load_state_dict(torch.load('best_digit_recognition_model.pt'))


<All keys matched successfully>

In [35]:
def load_dataset(filename):
    """Load dataset from disk"""
    with open(filename, 'rb') as f:
        return pickle.load(f)

In [36]:
val = load_dataset('valset.pkl')
test = load_dataset('testset.pkl')

In [41]:
# Create data loaders
val_loader = DataLoader(val, batch_size=32, shuffle=False)
test_loader = DataLoader(test, batch_size=32, shuffle=False)

In [42]:
# Define loss function
criterion = nn.CrossEntropyLoss()

# Evaluate on validation set
print("\nEvaluating on validation set...")
val_results = evaluate_model(model, val_loader, criterion, device)
print(f"Validation Accuracy: {val_results['accuracy']:.2f}%")
print(f"Validation Loss: {val_results['loss']:.4f}")

# Evaluate on test set
print("\nEvaluating on test set...")
test_results = evaluate_model(model, test_loader, criterion, device)
print(f"Test Accuracy: {test_results['accuracy']:.2f}%")
print(f"Test Loss: {test_results['loss']:.4f}")

# Save results
np.savez('evaluation_results.npz',
         val_predictions=val_results['predictions'],
         val_true_labels=val_results['true_labels'],
         test_predictions=test_results['predictions'],
         test_true_labels=test_results['true_labels'])


Evaluating on validation set...


Evaluating: 100%|██████████| 141/141 [00:02<00:00, 61.53it/s]


Validation Accuracy: 95.82%
Validation Loss: 0.5575

Evaluating on test set...


Evaluating: 100%|██████████| 94/94 [00:01<00:00, 69.36it/s]

Test Accuracy: 96.63%
Test Loss: 0.3418



