Data Preprocessing

In [None]:
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset, Dataset
from sklearn.model_selection import KFold
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import accuracy_score
import random
import csv

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),          
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


full_dataset = datasets.ImageFolder('./403-Project3-Dataset/Dataset', transform=transform)

# for idx, (path, label) in enumerate(full_dataset.samples):
#     print(f"File {idx}: {path}, Label: {label}")


# print(full_dataset.class_to_idx)

k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)

Using device: cuda:0
File 0: ./403-Project3-Dataset/Dataset\Alex\Alex-Image01.png, Label: 0
File 1: ./403-Project3-Dataset/Dataset\Alex\Alex-Image02.png, Label: 0
File 2: ./403-Project3-Dataset/Dataset\Alex\Alex-Image03.png, Label: 0
File 3: ./403-Project3-Dataset/Dataset\Alex\Alex-Image04.png, Label: 0
File 4: ./403-Project3-Dataset/Dataset\Alex\Alex-Image05.png, Label: 0
File 5: ./403-Project3-Dataset/Dataset\Alex\Alex-Image06.png, Label: 0
File 6: ./403-Project3-Dataset/Dataset\Alex\Alex-Image07.png, Label: 0
File 7: ./403-Project3-Dataset/Dataset\Alex\Alex-Image08.png, Label: 0
File 8: ./403-Project3-Dataset/Dataset\Alex\Alex-Image09.png, Label: 0
File 9: ./403-Project3-Dataset/Dataset\Alex\Alex-Image10.png, Label: 0
File 10: ./403-Project3-Dataset/Dataset\Alex\Alex-Image100.png, Label: 0
File 11: ./403-Project3-Dataset/Dataset\Alex\Alex-Image101.png, Label: 0
File 12: ./403-Project3-Dataset/Dataset\Alex\Alex-Image102.png, Label: 0
File 13: ./403-Project3-Dataset/Dataset\Alex\Alex-

Model 1: CNN

In [None]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(16) 
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(0.25)  

        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)  
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(0.25)  
        
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)  
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout3 = nn.Dropout(0.25)  

        self.fc1 = nn.Linear(256 * 14 * 14, 128) 
        self.dropout_fc1 = nn.Dropout(0.5)  
        self.fc2 = nn.Linear(128, 2)  

    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.dropout1(x)
        
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = self.dropout2(x)
        
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
        x = self.dropout3(x)
        
        x = x.view(-1, 256 * 14 * 14) 
        x = F.relu(self.fc1(x))
        x = self.dropout_fc1(x)
        x = self.fc2(x)
        return x


CNN Training

In [None]:
for fold, (train_idx, val_idx) in enumerate(kfold.split(full_dataset)):
    print(f'Fold {fold + 1}')
    
    train_subset = Subset(full_dataset, train_idx)
    val_subset = Subset(full_dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=64, shuffle=False)
    
    model = CNNModel()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    
    model.train()
    for epoch in range(5):  
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
    
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        train_accuracy = 100 * correct / total
        print(f'Epoch {epoch + 1} | Loss: {running_loss / len(train_loader):.4f} | Training Accuracy: {train_accuracy:.2f}%')
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print(f'Validation Accuracy for Fold {fold + 1}: {100 * correct / total:.2f}%')

Fold 1
Epoch 1 | Loss: 13.2577 | Training Accuracy: 63.14%
Epoch 2 | Loss: 2.3418 | Training Accuracy: 62.29%
Epoch 3 | Loss: 1.4733 | Training Accuracy: 59.47%
Epoch 4 | Loss: 0.9710 | Training Accuracy: 59.90%
Epoch 5 | Loss: 0.5997 | Training Accuracy: 60.01%
Validation Accuracy for Fold 1: 63.92%
Fold 2
Epoch 1 | Loss: 7.9362 | Training Accuracy: 51.55%
Epoch 2 | Loss: 4.5652 | Training Accuracy: 52.69%
Epoch 3 | Loss: 2.2214 | Training Accuracy: 53.61%
Epoch 4 | Loss: 0.8442 | Training Accuracy: 55.97%
Epoch 5 | Loss: 0.6421 | Training Accuracy: 57.63%
Validation Accuracy for Fold 2: 65.98%
Fold 3
Epoch 1 | Loss: 11.5987 | Training Accuracy: 54.23%
Epoch 2 | Loss: 7.4956 | Training Accuracy: 52.58%
Epoch 3 | Loss: 1.7868 | Training Accuracy: 55.67%
Epoch 4 | Loss: 1.0524 | Training Accuracy: 55.85%
Epoch 5 | Loss: 0.6826 | Training Accuracy: 55.82%
Validation Accuracy for Fold 3: 50.52%
Fold 4
Epoch 1 | Loss: 4.1141 | Training Accuracy: 55.05%
Epoch 2 | Loss: 1.7658 | Training Acc

Resnet Implementation

In [None]:
resnet_val_predictions = {}

for fold, (train_idx, val_idx) in enumerate(kfold.split(full_dataset)):
    print(f'Fold {fold + 1}')
    
    train_subset = Subset(full_dataset, train_idx)
    val_subset = Subset(full_dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=64, shuffle=False)
    
    model = models.resnet50(pretrained=True)
    
    for param in model.parameters():
        param.requires_grad = False
    
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 512),  
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(512, 1)  
    )
    model = model.to(device)

    optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
    
    criterion = nn.BCEWithLogitsLoss()
    
    model.train()
    for epoch in range(10): 
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float() 
            
            optimizer.zero_grad()
            outputs = model(inputs).squeeze() 
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
            probabilities = torch.sigmoid(outputs)  
            predicted = (probabilities > 0.5).long() 
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        train_accuracy = 100 * correct / total
        print(f'Epoch {epoch + 1} | Loss: {running_loss / len(train_loader):.4f} | Training Accuracy: {train_accuracy:.2f}%')
    
    model.eval()
    val_preds = []
    val_labels = []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device).float()
            outputs = model(inputs).squeeze()
            probabilities = torch.sigmoid(outputs)
            val_preds.extend(probabilities.cpu().numpy())  
            val_labels.extend(labels.cpu().numpy())  
            
            predicted = (probabilities > 0.5).long()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    val_accuracy = 100 * correct / total
    print(f'Validation Accuracy for Fold {fold + 1}: {val_accuracy:.2f}%')

    resnet_val_predictions[fold] = {
        "predictions": np.array(val_preds),
        "true_labels": np.array(val_labels)
    }

print("Training complete.")


Fold 1
Epoch 1 | Loss: 0.7117 | Training Accuracy: 55.67%
Epoch 2 | Loss: 0.4612 | Training Accuracy: 79.90%
Epoch 3 | Loss: 0.4295 | Training Accuracy: 82.99%
Epoch 4 | Loss: 0.3645 | Training Accuracy: 81.44%
Epoch 5 | Loss: 0.3298 | Training Accuracy: 85.05%
Epoch 6 | Loss: 0.3051 | Training Accuracy: 86.08%
Epoch 7 | Loss: 0.2613 | Training Accuracy: 88.14%
Epoch 8 | Loss: 0.2685 | Training Accuracy: 87.63%
Epoch 9 | Loss: 0.3320 | Training Accuracy: 85.82%
Epoch 10 | Loss: 0.2522 | Training Accuracy: 89.18%
Validation Accuracy for Fold 1: 88.04%
Fold 2
Epoch 1 | Loss: 0.7602 | Training Accuracy: 53.87%
Epoch 2 | Loss: 0.4780 | Training Accuracy: 75.00%
Epoch 3 | Loss: 0.5043 | Training Accuracy: 77.58%
Epoch 4 | Loss: 0.4066 | Training Accuracy: 80.41%
Epoch 5 | Loss: 0.4268 | Training Accuracy: 82.73%
Epoch 6 | Loss: 0.4064 | Training Accuracy: 82.99%
Epoch 7 | Loss: 0.3925 | Training Accuracy: 84.02%
Epoch 8 | Loss: 0.3926 | Training Accuracy: 82.99%
Epoch 9 | Loss: 0.2843 | Tra

In [None]:
dataset_size = len(full_dataset)
indices = list(range(dataset_size))
random.seed(42)  
random.shuffle(indices)  

shuffled_dataset = Subset(full_dataset, indices)
shuffled_loader = DataLoader(shuffled_dataset, batch_size=64, shuffle=True)

shuffled_file_paths = [full_dataset.samples[i][0] for i in indices]  
shuffled_labels = [full_dataset.samples[i][1] for i in indices]  

model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(512, 1)
)
model = model.to(device)


optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()


model.train()
for epoch in range(15):  
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in shuffled_loader:
        inputs, labels = inputs.to(device), labels.to(device).float()
        
        optimizer.zero_grad()
        outputs = model(inputs).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        probabilities = torch.sigmoid(outputs)
        predicted = (probabilities > 0.5).long()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    train_accuracy = 100 * correct / total
    print(f'Epoch {epoch + 1} | Loss: {running_loss / len(shuffled_loader):.4f} | Training Accuracy: {train_accuracy:.2f}%')

unshuffled_loader = DataLoader(full_dataset, batch_size=64, shuffle=False)  
model.eval()

results = []
with torch.no_grad():
    for batch_idx, (inputs, labels) in enumerate(unshuffled_loader):
        inputs, labels = inputs.to(device), labels.to(device).float()
        outputs = model(inputs).squeeze()
        probabilities = torch.sigmoid(outputs).cpu().numpy()
        predicted = (probabilities > 0.5).astype(int)
        

        start_idx = batch_idx * unshuffled_loader.batch_size
        end_idx = start_idx + len(labels)
        file_paths = [full_dataset.samples[i][0] for i in range(start_idx, end_idx)]
        true_labels = labels.cpu().numpy()
        

        for file_path, true_label, pred_label in zip(file_paths, true_labels, predicted):
            results.append((file_path, int(true_label), int(pred_label)))


output_file = "evaluation_results.csv"
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["File_Name", "True_Label", "Predicted_Label"])
    for file_name, true_label, pred_label in results:
        writer.writerow([file_name, true_label, pred_label])

print(f"Evaluation results saved to {output_file}")


Epoch 1 | Loss: 0.6058 | Training Accuracy: 66.39%
Epoch 2 | Loss: 0.4969 | Training Accuracy: 76.49%
Epoch 3 | Loss: 0.4043 | Training Accuracy: 82.89%
Epoch 4 | Loss: 0.4689 | Training Accuracy: 78.14%
Epoch 5 | Loss: 0.3701 | Training Accuracy: 83.71%
Epoch 6 | Loss: 0.3274 | Training Accuracy: 84.74%
Epoch 7 | Loss: 0.4052 | Training Accuracy: 79.79%
Epoch 8 | Loss: 0.2789 | Training Accuracy: 87.84%
Epoch 9 | Loss: 0.2612 | Training Accuracy: 88.04%
Epoch 10 | Loss: 0.2120 | Training Accuracy: 91.34%
Epoch 11 | Loss: 0.2226 | Training Accuracy: 91.34%
Epoch 12 | Loss: 0.2852 | Training Accuracy: 87.63%
Epoch 13 | Loss: 0.2483 | Training Accuracy: 88.87%
Epoch 14 | Loss: 0.2252 | Training Accuracy: 90.72%
Epoch 15 | Loss: 0.2268 | Training Accuracy: 91.13%
Evaluation results saved to evaluation_results.csv


Fusion Method

Fused Dataset

Load & Preprocess Data

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
csv_data = pd.read_csv("/Users/Andrew/Documents/DATA403-Project3/403-PRoject3-Dataset/Dataset/train.csv")
tabular_features = csv_data.drop(columns=["image", "photographer"])  
labels = label_encoder.fit_transform(csv_data["photographer"])


XGBoost Training Loop

In [None]:
xgboost_models = []
xgboost_val_predictions = {}  

for fold, (train_idx, val_idx) in enumerate(kfold.split(tabular_features)):
    print(f'\nFold {fold + 1}')
    
    X_train, X_val = tabular_features.iloc[train_idx], tabular_features.iloc[val_idx]
    y_train, y_val = labels[train_idx], labels[val_idx]

    dtrain = xgb.DMatrix(X_train, label=y_train)
    dval = xgb.DMatrix(X_val, label=y_val)
    
    params = {
        "objective": "binary:logistic", 
        "eval_metric": "logloss",       
        "eta": 0.01,                    
        "max_depth": 6,                
        "subsample": 0.8,               
        "colsample_bytree": 0.8,       
        "seed": 42                     
    }

    evals = [(dtrain, "train"), (dval, "validation")]
    xgb_model = xgb.train(params, dtrain, num_boost_round=1000, evals=evals, early_stopping_rounds=50)
    
    xgboost_models.append(xgb_model)
    
    xgboost_val_predictions[fold] = (xgb_model.predict(dval), y_val)

print("\nXGBoost Training complete.")



Fold 1
[0]	train-logloss:0.68850	validation-logloss:0.68930
[1]	train-logloss:0.68608	validation-logloss:0.68758
[2]	train-logloss:0.68276	validation-logloss:0.68472
[3]	train-logloss:0.68033	validation-logloss:0.68252
[4]	train-logloss:0.67679	validation-logloss:0.67943
[5]	train-logloss:0.67355	validation-logloss:0.67694
[6]	train-logloss:0.67114	validation-logloss:0.67506
[7]	train-logloss:0.66855	validation-logloss:0.67291
[8]	train-logloss:0.66606	validation-logloss:0.67072
[9]	train-logloss:0.66302	validation-logloss:0.66824
[10]	train-logloss:0.66050	validation-logloss:0.66639
[11]	train-logloss:0.65795	validation-logloss:0.66445
[12]	train-logloss:0.65572	validation-logloss:0.66262
[13]	train-logloss:0.65381	validation-logloss:0.66145
[14]	train-logloss:0.65095	validation-logloss:0.65917
[15]	train-logloss:0.64797	validation-logloss:0.65700
[16]	train-logloss:0.64647	validation-logloss:0.65598
[17]	train-logloss:0.64364	validation-logloss:0.65358
[18]	train-logloss:0.64112	val

Ensemble Model Results

In [None]:
ensemble_results = []

for fold in range(len(resnet_val_predictions)):
    print(f"Ensembling Fold {fold + 1}")
    
    resnet_preds = np.array(resnet_val_predictions[fold]["predictions"])  
    true_labels = np.array(resnet_val_predictions[fold]["true_labels"])  
    xgb_preds = np.array(xgboost_val_predictions[fold][0])  

    ensemble_preds = 0.5 * resnet_preds + 0.5 * xgb_preds
    ensemble_preds_binary = (ensemble_preds > 0.5).astype(int)

    accuracy = accuracy_score(true_labels, ensemble_preds_binary) * 100
    print(f"Ensemble Validation Accuracy for Fold {fold + 1}: {accuracy:.2f}%")
    ensemble_results.append(accuracy)


average_ensemble_accuracy = np.mean(ensemble_results)
print(f"\nAverage Ensemble Validation Accuracy: {average_ensemble_accuracy:.2f}%")


5
Ensembling Fold 1
Ensemble Validation Accuracy for Fold 1: 89.69%
Ensembling Fold 2
Ensemble Validation Accuracy for Fold 2: 84.54%
Ensembling Fold 3
Ensemble Validation Accuracy for Fold 3: 85.57%
Ensembling Fold 4
Ensemble Validation Accuracy for Fold 4: 87.63%
Ensembling Fold 5
Ensemble Validation Accuracy for Fold 5: 88.66%

Average Ensemble Validation Accuracy: 87.22%


HoldoutSet Predictions

In [None]:
train_data_path = "/Users/Andrew/Documents/DATA403-Project3/403-PRoject3-Dataset/Dataset/train.csv"
train_data = pd.read_csv(train_data_path)

label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_data["photographer"])
train_features = train_data.drop(columns=["image", "photographer"])

dtrain = xgb.DMatrix(train_features, label=train_labels)


params = {
    "objective": "binary:logistic", 
    "eval_metric": "logloss",       
    "eta": 0.01,                    
    "max_depth": 6,                 
    "subsample": 0.8,               
    "colsample_bytree": 0.8,        
    "seed": 42                      
}

xgb_model = xgb.train(params, dtrain, num_boost_round=1000)


test_files = ["./testset01.csv", "./testset02.csv"]
predictions = []

for test_file in test_files:
    test_data = pd.read_csv(test_file)
    test_features = test_data.drop(columns=["image", "photographer"])
    

    dtest = xgb.DMatrix(test_features)
    

    test_predictions = xgb_model.predict(dtest)
    test_predicted_labels = (test_predictions > 0.5).astype(int)  
    test_file_names = test_data["image"]
    

    predictions.extend(zip(test_file_names, test_predicted_labels))

predictions_df = pd.DataFrame(predictions, columns=["File_Name", "Predicted_Label"])

output_file = "predicted_labels.csv"
predictions_df.to_csv(output_file, index=False)

print(f"Predicted labels saved to {output_file}")


Predicted labels saved to predicted_labels.csv


In [None]:
import os
import csv
from PIL import Image
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim
import torch

train_dataset_path = "C:\\Users\\Andrew\\Documents\\DATA403-Project3\\403-PRoject3-Dataset\\Dataset"
testset_paths = [
    "C:\\Users\\Andrew\\Documents\\DATA403-Project3\\403-PRoject3-Dataset\\TestSet01",
    "C:\\Users\\Andrew\\Documents\\DATA403-Project3\\403-PRoject3-Dataset\\TestSet02"
]
output_file = "testset_predictions.csv"

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
full_dataset = ImageFolder(train_dataset_path, transform=transform)
full_loader = DataLoader(full_dataset, batch_size=64, shuffle=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(512, 1)
)
model = model.to(device)

optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()

model.train()
for epoch in range(15):  
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in full_loader:
        inputs, labels = inputs.to(device), labels.to(device).float()
        
        optimizer.zero_grad()
        outputs = model(inputs).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        probabilities = torch.sigmoid(outputs)
        predicted = (probabilities > 0.5).long()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    train_accuracy = 100 * correct / total
    print(f'Epoch {epoch + 1} | Loss: {running_loss / len(full_loader):.4f} | Training Accuracy: {train_accuracy:.2f}%')

model.eval()
results = []

def load_images_from_folder(folder, transform):
    """Load all image file paths and preprocess them."""
    images = []
    file_paths = []
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        if os.path.isfile(file_path) and filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image = Image.open(file_path).convert("RGB")
            images.append(transform(image))
            file_paths.append(file_path)
    return images, file_paths

with torch.no_grad():
    for testset_path in testset_paths:
        images, file_paths = load_images_from_folder(testset_path, transform)
        images = torch.stack(images).to(device)
        
        for i in range(0, len(images), 64):  
            batch_images = images[i:i+64]
            outputs = model(batch_images).squeeze()
            probabilities = torch.sigmoid(outputs).cpu().numpy()
            predicted = (probabilities > 0.5).astype(int)
            
            for file_path, pred_label in zip(file_paths[i:i+64], predicted):
                results.append((file_path, int(pred_label)))


with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["File_Name", "Predicted_Label"])
    for file_name, pred_label in results:
        writer.writerow([file_name, pred_label])

print(f"Predicted labels saved to {output_file}")




Epoch 1 | Loss: 0.7503 | Training Accuracy: 54.85%
Epoch 2 | Loss: 0.5487 | Training Accuracy: 69.90%
Epoch 3 | Loss: 0.4279 | Training Accuracy: 82.06%
Epoch 4 | Loss: 0.3684 | Training Accuracy: 84.33%
Epoch 5 | Loss: 0.3342 | Training Accuracy: 86.80%
Epoch 6 | Loss: 0.3466 | Training Accuracy: 85.57%
Epoch 7 | Loss: 0.3416 | Training Accuracy: 84.54%
Epoch 8 | Loss: 0.2963 | Training Accuracy: 87.63%
Epoch 9 | Loss: 0.2750 | Training Accuracy: 89.28%
Epoch 10 | Loss: 0.2586 | Training Accuracy: 90.52%
Epoch 11 | Loss: 0.2180 | Training Accuracy: 91.13%
Epoch 12 | Loss: 0.2160 | Training Accuracy: 91.55%
Epoch 13 | Loss: 0.2693 | Training Accuracy: 88.87%
Epoch 14 | Loss: 0.2987 | Training Accuracy: 85.36%
Epoch 15 | Loss: 0.2035 | Training Accuracy: 91.55%
Predicted labels saved to testset_predictions.csv


Final Ensemble Prediction

In [None]:
xgboost_file = "./predicted_labels.csv"
resnet_file = "./testset_predictions.csv"
output_file = "./final_ensemble_predictions.csv"

xgboost_preds = pd.read_csv(xgboost_file)
resnet_preds = pd.read_csv(resnet_file)

weights = {"xgboost": 0.5, "resnet": 0.5}  
ensemble_predictions = (
    weights["xgboost"] * xgboost_preds["Predicted_Label"] +
    weights["resnet"] * resnet_preds["Predicted_Label"]
)


final_predictions = (ensemble_predictions > 0.5).astype(int)

final_results = pd.DataFrame({
    "File_Name": xgboost_preds["File_Name"],
    "Final_Predicted_Label": final_predictions
})
final_results.to_csv(output_file, index=False)

print(f"Final ensemble predictions saved to {output_file}")


Final ensemble predictions saved to ./final_ensemble_predictions.csv
