In [7]:
import os
import zipfile
import pandas as pd
from PIL import Image
from io import BytesIO
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_auc_score, accuracy_score

# --- Load CSV and ZIP ---
df = pd.read_csv('train.csv')
train_dir = 'train/train'

class ImageFolderDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe.reset_index(drop=True)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx, 0]
        label = self.dataframe.iloc[idx, 1]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('L')
        if self.transform:
            image = self.transform(image)
        return image, label

#Preprocessing - resize all images to 32x32 & convert into pytorch tensor (this normalizes to [0,1] range)
# transform = transforms.Compose([
#     transforms.Resize((32, 32)),
#     transforms.ToTensor()
# ])

#let's try a more encompassing transform
transform = transforms.Compose([
    transforms.Resize((128, 128)),   #larger image to capture more details (likely increase compute time)
    #randomize orientations to increase generalizability
    transforms.RandomHorizontalFlip(),  
    transforms.RandomRotation(10),
    transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),

    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize to [-1, 1]
])


full_dataset = ImageFolderDataset(df, train_dir, transform)

#split into 50% training and 50% validation
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

#loads training and validation data into batches of 128
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)


class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.conv3 = nn.Conv2d(16, 32, 3)
        self.pool = nn.MaxPool2d(2, 2)

        # Dummy input to get the flattened size
        self._to_linear = None
        self._get_flatten_size()

        self.fc1 = nn.Linear(self._to_linear, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 1)

    def _get_flatten_size(self):
        with torch.no_grad():
            x = torch.zeros(1, 1, 128, 128)
            x = self.pool(torch.relu(self.conv1(x)))
            x = self.pool(torch.relu(self.conv2(x)))
            x = torch.relu(self.conv3(x))
            self._to_linear = x.view(1, -1).shape[1]

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = torch.relu(self.conv3(x))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.get_device_name(0))
print(device)

#define model w/ class from above
model = LeNet().to(device)
criterion = nn.BCEWithLogitsLoss()  #this function should be better for our task? #nn.CrossEntropyLoss()   #loss function  
optimizer = optim.Adam(model.parameters(), lr=0.001)    #adam learning rate optimizer

#training loop
for epoch in range(10):  #10 epochs
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)  #move data to GPU

        optimizer.zero_grad()   #clear previous gradients
        outputs = model(images).squeeze(1)   #compute forward pass
        loss = criterion(outputs, labels.float())    #compute loss
        loss.backward()             #compute back propagation
        optimizer.step()            #update model params

        running_loss += loss.item()

        if (i + 1) % 10 == 0 or (i + 1) == len(train_loader):
            print(f"Epoch [{epoch+1}], Batch [{i+1}/{len(train_loader)}], Batch Loss: {loss.item():.4f}")


    # --- Validation ---
    all_probs, all_labels, all_preds = [], [], []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # Use sigmoid instead of softmax
            probs = torch.sigmoid(outputs).squeeze(1)
            preds = (probs > 0.5).long()

            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    val_auc = roc_auc_score(all_labels, all_probs)
    val_acc = accuracy_score(all_labels, all_preds)

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}, Val Acc: {val_acc*100:.2f}%, AUC: {val_auc:.4f}")

print("Training + Validation done.")



#now test and save outputs

# --- Predict on Test Set and Save Submission ---
print("Generating test predictions...")

# Test setup
test_dir = 'test/test'
test_filenames = sorted(os.listdir(test_dir))

class TestDataset(Dataset):
    def __init__(self, file_list, root_dir, transform=None):
        self.file_list = file_list
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        img_name = self.file_list[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('L')
        if self.transform:
            image = self.transform(image)
        return image, img_name

test_dataset = TestDataset(test_filenames, test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Prediction loop
model.eval()
results = []
with torch.no_grad():
    for images, names in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = torch.sigmoid(outputs).squeeze(1).cpu().numpy()
        results.extend(zip(names, probs))

# Save to CSV
submission_df = pd.DataFrame(results, columns=["Id", "label"])
submission_df.to_csv("outputs.csv", index=False)
print("Submission file saved as 'outputs.csv'")


NVIDIA GeForce GTX 1080 Ti
cuda
Epoch [1], Batch [10/400], Batch Loss: 0.6262
Epoch [1], Batch [20/400], Batch Loss: 0.5528
Epoch [1], Batch [30/400], Batch Loss: 0.5103
Epoch [1], Batch [40/400], Batch Loss: 0.4959
Epoch [1], Batch [50/400], Batch Loss: 0.4841
Epoch [1], Batch [60/400], Batch Loss: 0.4107
Epoch [1], Batch [70/400], Batch Loss: 0.4747
Epoch [1], Batch [80/400], Batch Loss: 0.4119
Epoch [1], Batch [90/400], Batch Loss: 0.3828
Epoch [1], Batch [100/400], Batch Loss: 0.3636
Epoch [1], Batch [110/400], Batch Loss: 0.5080
Epoch [1], Batch [120/400], Batch Loss: 0.6495
Epoch [1], Batch [130/400], Batch Loss: 0.4662
Epoch [1], Batch [140/400], Batch Loss: 0.4434
Epoch [1], Batch [150/400], Batch Loss: 0.4582
Epoch [1], Batch [160/400], Batch Loss: 0.4985
Epoch [1], Batch [170/400], Batch Loss: 0.4265
Epoch [1], Batch [180/400], Batch Loss: 0.4457
Epoch [1], Batch [190/400], Batch Loss: 0.4434
Epoch [1], Batch [200/400], Batch Loss: 0.3470
Epoch [1], Batch [210/400], Batch Los

In [5]:
import os
import zipfile
import numpy as np
import pandas as pd
from PIL import Image
from io import BytesIO
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.linear_model import LogisticRegression
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_auc_score, accuracy_score
from torchvision.models import resnet18, resnet34

# --- Load CSV and ZIP ---
df = pd.read_csv('train.csv')
train_dir = 'train/train'

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.conv3 = nn.Conv2d(16, 32, 3)
        self.pool = nn.MaxPool2d(2, 2)

        self._to_linear = None
        self._get_flatten_size()

        self.fc1 = nn.Linear(self._to_linear, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 1)

    def _get_flatten_size(self):
        with torch.no_grad():
            x = torch.zeros(1, 1, 128, 128)
            x = self.pool(torch.relu(self.conv1(x)))
            x = self.pool(torch.relu(self.conv2(x)))
            x = torch.relu(self.conv3(x))
            self._to_linear = x.view(1, -1).shape[1]

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = torch.relu(self.conv3(x))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(32 * 32 * 32, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        
    def forward(self, x):
        return self.net(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Initialize models ---
model1 = LeNet().to(device)
model2 = SimpleCNN().to(device)

models_list = [model1, model2]

class ImageFolderDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe.reset_index(drop=True)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx, 0]
        label = self.dataframe.iloc[idx, 1]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('L')  # Convert to grayscale
        if self.transform:
            image = self.transform(image)
        return image, label

# --- Define Transforms ---
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# --- Create Dataset ---
full_dataset = ImageFolderDataset(df, train_dir, transform=transform)

# --- Split into Train and Validation ---
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# --- Define Dataloaders ---
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)

# --- Training function for a single model ---

def train_model(model, train_loader, val_loader, epochs=5):
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCEWithLogitsLoss()

    for epoch in range(epochs):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images).squeeze(1)
            loss = criterion(outputs, labels.float())
            loss.backward()
            optimizer.step()

# --- Train each model ---
for model in models_list:
    train_model(model, train_loader, val_loader, epochs=5)

print("Base models trained.")

# --- Get validation predictions from each model ---

def get_predictions(model, loader):
    model.eval()
    preds = []
    with torch.no_grad():
        for images, _ in loader:
            images = images.to(device)
            outputs = model(images)
            probs = torch.sigmoid(outputs).squeeze(1)
            preds.append(probs.cpu().numpy())
    return np.concatenate(preds)

val_preds = []
for model in models_list:
    preds = get_predictions(model, val_loader)
    val_preds.append(preds)

val_preds = np.stack(val_preds, axis=1)  # shape: (num_samples, num_models)

# Get true labels
all_labels = []
for _, labels in val_loader:
    all_labels.extend(labels.numpy())
all_labels = np.array(all_labels)

# --- Train Meta Model (Logistic Regression) ---
meta_model = LogisticRegression()
meta_model.fit(val_preds, all_labels)

print("Meta model trained.")

# --- Test Predictions ---

# Get test preds from each model
test_preds = []
for model in models_list:
    preds = []
    for images, names in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = torch.sigmoid(outputs).squeeze(1)
        preds.append(probs.cpu().numpy())
    preds = np.concatenate(preds)
    test_preds.append(preds)

test_preds = np.stack(test_preds, axis=1)  # shape (num_samples, num_models)

# Use meta model to predict
final_probs = meta_model.predict_proba(test_preds)[:, 1]

# --- Save Final Output ---
submission_df = pd.DataFrame({
    "Id": test_filenames,
    "label": final_probs
})
submission_df.to_csv("stacked_outputs.csv", index=False)

print("Stacked submission saved as 'stacked_outputs.csv'")


Base models trained.
Meta model trained.


NameError: name 'test_loader' is not defined

### Let's now try one with ResNet, which may take longer to train, but is supposed to perform much better 

In [None]:
import os
import zipfile
import pandas as pd
from PIL import Image
from io import BytesIO
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_auc_score, accuracy_score
from torchvision.models import resnet18, resnet34

# --- Load CSV and ZIP ---
df = pd.read_csv('train.csv')
train_dir = 'train/train'

class ImageFolderDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe.reset_index(drop=True)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx, 0]
        label = self.dataframe.iloc[idx, 1]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('L')
        if self.transform:
            image = self.transform(image)
        return image, label

#let's try a more encompassing transform
transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2)),
    transforms.RandomCrop(128, padding=4),  # Crop for more variability
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Color jitter
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])


full_dataset = ImageFolderDataset(df, train_dir, transform)

#split into 50% training and 50% validation
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

#loads training and validation data into batches of 128
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)

class CustomResNet(nn.Module):
    def __init__(self):
        super(CustomResNet, self).__init__()
        self.model = resnet34(pretrained=True)   #used to be false

        # Modify first conv layer to accept 1-channel input (instead of 3)
        self.model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

        # Modify final FC layer to output 1 logit (for binary classification)
        self.model.fc = nn.Linear(self.model.fc.in_features, 1)

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.get_device_name(0))
print(device)

#define model w/ class from above
model = CustomResNet().to(device)
criterion = nn.BCEWithLogitsLoss()  #this function should be better for our task? #nn.CrossEntropyLoss()   #loss function  
optimizer = optim.Adam(model.parameters(), lr=0.001)    #adam learning rate optimizer

# Add learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # Halve learning rate every 5 epochs

#training loop
for epoch in range(10):  #10 epochs
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)  #move data to GPU

        optimizer.zero_grad()   #clear previous gradients
        outputs = model(images).squeeze(1)   #compute forward pass
        loss = criterion(outputs, labels.float())    #compute loss
        loss.backward()             #compute back propagation
        optimizer.step()            #update model params

        running_loss += loss.item()

        if (i + 1) % 10 == 0 or (i + 1) == len(train_loader):
            print(f"Epoch [{epoch+1}], Batch [{i+1}/{len(train_loader)}], Batch Loss: {loss.item():.4f}")

    # --- Step the scheduler every epoch
    scheduler.step()

    # --- Validation ---
    all_probs, all_labels, all_preds = [], [], []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # Use sigmoid instead of softmax
            probs = torch.sigmoid(outputs).squeeze(1)
            preds = (probs > 0.5).long()

            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    val_auc = roc_auc_score(all_labels, all_probs)
    val_acc = accuracy_score(all_labels, all_preds)

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}, Val Acc: {val_acc*100:.2f}%, AUC: {val_auc:.4f}")

print("Training + Validation done.")



#now test and save outputs

# --- Predict on Test Set and Save Submission ---
print("Generating test predictions...")

# Test setup
test_dir = 'test/test'
test_filenames = sorted(os.listdir(test_dir))

class TestDataset(Dataset):
    def __init__(self, file_list, root_dir, transform=None):
        self.file_list = file_list
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        img_name = self.file_list[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('L')
        if self.transform:
            image = self.transform(image)
        return image, img_name

test_dataset = TestDataset(test_filenames, test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Prediction loop
model.eval()
results = []
with torch.no_grad():
    for images, names in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = torch.sigmoid(outputs).squeeze(1).cpu().numpy()
        results.extend(zip(names, probs))

# Save to CSV
submission_df = pd.DataFrame(results, columns=["Id", "label"])
submission_df.to_csv("outputs.csv", index=False)
print("Submission file saved as 'outputs.csv'")


NVIDIA GeForce GTX 1080 Ti
cuda


Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to C:\Users\Ethan/.cache\torch\hub\checkpoints\resnet101-63fe2227.pth
100.0%


Epoch [1], Batch [10/800], Batch Loss: 0.4703
Epoch [1], Batch [20/800], Batch Loss: 0.4543
Epoch [1], Batch [30/800], Batch Loss: 0.3712
Epoch [1], Batch [40/800], Batch Loss: 0.4599
Epoch [1], Batch [50/800], Batch Loss: 0.4753
Epoch [1], Batch [60/800], Batch Loss: 0.5169
Epoch [1], Batch [70/800], Batch Loss: 0.2529
Epoch [1], Batch [80/800], Batch Loss: 0.5297
Epoch [1], Batch [90/800], Batch Loss: 0.4505
Epoch [1], Batch [100/800], Batch Loss: 0.3633
Epoch [1], Batch [110/800], Batch Loss: 0.4084
Epoch [1], Batch [120/800], Batch Loss: 0.3999
Epoch [1], Batch [130/800], Batch Loss: 0.3712
Epoch [1], Batch [140/800], Batch Loss: 0.3561
Epoch [1], Batch [150/800], Batch Loss: 0.2928
Epoch [1], Batch [160/800], Batch Loss: 0.2615
Epoch [1], Batch [170/800], Batch Loss: 0.3975
Epoch [1], Batch [180/800], Batch Loss: 0.3151
Epoch [1], Batch [190/800], Batch Loss: 0.4175
Epoch [1], Batch [200/800], Batch Loss: 0.3637
Epoch [1], Batch [210/800], Batch Loss: 0.2617
Epoch [1], Batch [220/