# Best Performers Weighted Ensemble

This program combines the top two models: Resnet18 and Pretrained VGG16 based on the inverse of their performance multiclass log loss on their kaggle submission. Resnet18 received a kaggle score of 0.28200 and pretrained VGG16 recieved a score of 0.41719. These were are best performers so we decided to combine them in a weighted ensemble: This was our best performance on the kaggle submission and this weighted ensemble recieved a Kaggle score of 0.21775

In [None]:
import os
import csv
import torch
from torchvision import transforms, models
from PIL import Image
from tqdm import tqdm
import torch.nn as nn
import numpy as np

# Dataset Paths- Update with your paths
test_dir = r'C:\Users\blake\OneDrive\Desktop\MCS\Artifical Neural Networks\distracted-driving-behaviors\state-farm-distracted-driver-detection\imgs\test'
submission_file = "best_performers_weighted_ensemble_submission.csv"

# Hyperparameters
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 64
NUM_CLASSES = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transformations
transform_test = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # ImageNet normalization
])

def get_resnet_model(num_classes):
    model = models.resnet18(pretrained=False)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

def get_vgg_model(num_classes):
    model = models.vgg16(pretrained=False)
    model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)
    return model

# Model Paths and Scores
# Model Paths- Update with your paths
models_list = [
    ("ResNet18", "C:/Users/blake/OneDrive/Desktop/MCS/Artifical Neural Networks/distracted-driving-behaviors/Resnet/best_resnet_model.pth", 0.28200),
    ("VGG16", "C:/Users/blake/OneDrive/Desktop/MCS/Artifical Neural Networks/distracted-driving-behaviors/Pretrained_VGG/best_vgg_model.pth", 0.41719),
]

# Calculate Weights
scores = np.array([1 / m[2] for m in models_list])
weights = scores / scores.sum()

# Map Model Names to Classes
model_classes = {
    "ResNet18": get_resnet_model(NUM_CLASSES),
    "VGG16": get_vgg_model(NUM_CLASSES),
}

# Load Models
for name, path, _ in models_list:
    model = model_classes[name]
    model.load_state_dict(torch.load(path, map_location=device))
    model.to(device)
    model.eval()
    model_classes[name] = model

# Test Image Loader
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, image_paths, transform):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)
        return image, os.path.basename(img_path)

test_image_paths = [os.path.join(test_dir, img) for img in os.listdir(test_dir) if img.endswith(".jpg")]
test_dataset = TestDataset(test_image_paths, transform_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Generate Weighted Ensemble Predictions
with open(submission_file, "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["img"] + [f"c{i}" for i in range(NUM_CLASSES)])

    with torch.no_grad():
        for images, img_names in tqdm(test_loader, desc="Processing Batches"):
            images = images.to(device)
            ensemble_outputs = np.zeros((images.size(0), NUM_CLASSES))

            for (name, _, _), weight in zip(models_list, weights):
                model = model_classes[name]
                outputs = model(images).softmax(dim=1).cpu().numpy()
                ensemble_outputs += weight * outputs

            # Normalize the probabilities to ensure they sum to 1
            ensemble_outputs = np.clip(ensemble_outputs, 0, 1)  # Clamp values between 0 and 1
            ensemble_outputs = ensemble_outputs / ensemble_outputs.sum(axis=1, keepdims=True)

            for i, img_name in enumerate(img_names):
                writer.writerow([img_name] + ensemble_outputs[i].tolist())


print(f"Submission file saved as {submission_file}.")


  model.load_state_dict(torch.load(path, map_location=device))
Processing Batches: 100%|██████████| 1246/1246 [12:01<00:00,  1.73it/s]  

Submission file saved as best_performers_weighted_ensemble_submission.csv.



