In [None]:

from google.colab import drive
drive.mount('/content/drive/')
import os
project_path = '/content/drive/MyDrive/Cornell/pvz'
os.chdir(project_path)
print("current path：", os.getcwd())


Mounted at /content/drive/
current path： /content/drive/MyDrive/Cornell/pvz


In [None]:

import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split, Subset
from torchvision import transforms, models
import torchvision.transforms.functional as TF
from sklearn.model_selection import StratifiedKFold
import os
import time
import random
import torchvision.transforms.functional as TF


In [None]:
torch.manual_seed(42)
np.random.seed(42)
if torch.cuda.is_available():
  torch.cuda.manual_seed_all(42)
random.seed(42)

## Configuration

In [None]:
# Data paths
TRAIN_PKL_PATH = 'train.pkl'
TEST_PKL_PATH = 'test.pkl'
SUBMISSION_CSV_PATH = 'submission_siamese_resnet_acc.csv'
MODEL_SAVE_PATH_TEMPLATE = 'best_siamese_resnet_fold_{fold}.pth'

# Hyperparameters
K_FOLDS = 5
BATCH_SIZE = 32
LEARNING_RATE = 1e-4
WEIGHT_DECAY = 1e-3
EPOCHS_PER_FOLD = 100

PATIENCE_LR = 3
PATIENCE_ES = 25
USE_PRETRAINED_BASE = True

# Inference Configuration
INFERENCE_BATCH_SIZE = 128

# Device
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {DEVICE}")
print(f"K-Fold Cross-Validation with K={K_FOLDS}")

Using device: cuda
K-Fold Cross-Validation with K=5


## Data Handling

In [None]:
class RPSSiameseDataset(Dataset):
    def __init__(self, pkl_path, transform=None):
        self.imgs1 = None
        self.imgs2 = None
        self.labels = None
        self.transform = transforms.Compose([
            transforms.RandomRotation(13),
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(24, padding=2),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
            ])

        try:
            with open(pkl_path, 'rb') as f:
                data = pickle.load(f)
            print(f"Pickle file '{pkl_path}' loaded successfully.")

            print("Attempting to stack 'img1' data...")
            self.imgs1 = np.stack(data['img1']).astype(np.uint8) # Stack and ensure uint8 for PIL
            print(f"  'img1' stacked successfully. Shape: {self.imgs1.shape}")

            print("Attempting to stack 'img2' data...")
            self.imgs2 = np.stack(data['img2']).astype(np.uint8) # Stack and ensure uint8 for PIL
            print(f"  'img2' stacked successfully. Shape: {self.imgs2.shape}")

            labels_raw = np.array(data['label'])
            self.labels = torch.tensor((labels_raw == 1).astype(np.int64)) # 1 if img1 beats img2, else 0

            assert len(self.imgs1) == len(self.labels), "Mismatch between img1 count and labels count."
            assert len(self.imgs2) == len(self.labels), "Mismatch between img2 count and labels count."
            assert self.imgs1.shape[1:] == (24, 24), f"img1 shape error: {self.imgs1.shape}"
            assert self.imgs2.shape[1:] == (24, 24), f"img2 shape error: {self.imgs2.shape}"

            print(f"Dataset initialized successfully from {pkl_path}: {len(self.labels)} samples.")

        except FileNotFoundError:
            print(f"Error: File not found at {pkl_path}")
        except Exception as e:
            print(f"Error during dataset initialization from {pkl_path}: {e}")

            self.imgs1, self.imgs2, self.labels = None, None, None


    def __len__(self):
        return len(self.labels) if self.labels is not None else 0

    def __getitem__(self, idx):
        if self.imgs1 is None or self.imgs2 is None:
             raise IndexError("Dataset not initialized correctly.")

        im1_pil = Image.fromarray(self.imgs1[idx]) # Already uint8 from __init__
        im2_pil = Image.fromarray(self.imgs2[idx]) # Already uint8 from __init__
        y = self.labels[idx]

        # Apply independent transforms
        if self.transform:
            im1 = self.transform(im1_pil)
            im2 = self.transform(im2_pil)
        else:
            to_tensor = transforms.ToTensor()
            im1 = to_tensor(im1_pil)
            im2 = to_tensor(im2_pil)

        return im1, im2, y

# Dataset for Inference (Corrected Loading)
class RPSInferenceDataset(Dataset):
    def __init__(self, pkl_path, ids_key='id', img1_key='img1', img2_key='img2'):
        self.ids = None
        self.imgs1 = None
        self.imgs2 = None
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])

        try:
            with open(pkl_path, 'rb') as f:
                 data = pickle.load(f)
            print(f"Pickle file '{pkl_path}' loaded successfully for inference.")

            self.ids = data.get(ids_key)
            if self.ids is None:
                 raise ValueError(f"Key '{ids_key}' not found in pickle file.")
            self.ids = np.array(self.ids) # Ensure IDs are numpy array


            print("Attempting to stack 'img1' data for inference...")
            img1_data = data.get(img1_key)
            if img1_data is None: raise ValueError(f"Key '{img1_key}' not found.")
            self.imgs1 = np.stack(img1_data).astype(np.uint8)
            print(f"  'img1' stacked successfully. Shape: {self.imgs1.shape}")


            print("Attempting to stack 'img2' data for inference...")
            img2_data = data.get(img2_key)
            if img2_data is None: raise ValueError(f"Key '{img2_key}' not found.")
            self.imgs2 = np.stack(img2_data).astype(np.uint8)
            print(f"  'img2' stacked successfully. Shape: {self.imgs2.shape}")

            # Validation checks
            assert len(self.imgs1) == len(self.ids), "Mismatch between img1 count and ID count."
            assert len(self.imgs2) == len(self.ids), "Mismatch between img2 count and ID count."
            assert self.imgs1.shape[1:] == (24, 24), f"img1 shape error: {self.imgs1.shape}"
            assert self.imgs2.shape[1:] == (24, 24), f"img2 shape error: {self.imgs2.shape}"

            print(f"Inference dataset initialized successfully from {pkl_path}: {len(self.ids)} samples.")


        except FileNotFoundError:
            print(f"Error: File not found at {pkl_path}")
        except Exception as e:
            print(f"Error during inference dataset initialization from {pkl_path}: {e}")
            self.ids, self.imgs1, self.imgs2 = None, None, None


    def __len__(self):
        return len(self.ids) if self.ids is not None else 0

    def __getitem__(self, idx):
        if self.imgs1 is None or self.imgs2 is None or self.ids is None:
             raise IndexError("Inference dataset not initialized correctly.")

        im1_pil = Image.fromarray(self.imgs1[idx])
        im2_pil = Image.fromarray(self.imgs2[idx])
        current_id = self.ids[idx]

        # Apply only ToTensor and Normalize
        im1 = self.transform(im1_pil)
        im2 = self.transform(im2_pil)

        return im1, im2, current_id

print("Corrected Dataset classes defined.")

Corrected Dataset classes defined.


## Model Definition

In [None]:
# Base Network (resnet18 for Feature Extractor)
def get_base_resnet18(pretrained=True):
    weights = models.ResNet34_Weights.DEFAULT if pretrained else None # 改成resnet 34
    backbone = models.resnet34(weights=weights)
    original_conv1 = backbone.conv1
    backbone.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)

    if pretrained and original_conv1.weight.shape[1] == 3:
        new_weights = original_conv1.weight.data.mean(dim=1, keepdim=True)
        backbone.conv1.weight.data = new_weights
        # print("Adapted pretrained weights for conv1 (1 channel input).") # Optional print

    num_ftrs = backbone.fc.in_features
    backbone.fc = nn.Identity() # Remove final classification layer

    return backbone, num_ftrs

# Siamese Network
class SiameseNet(nn.Module):
    def __init__(self, pretrained_base=True):
        super().__init__()
        self.base_network, num_base_ftrs = get_base_resnet18(pretrained=pretrained_base)
        self.classifier_head = nn.Sequential(
            nn.Linear(num_base_ftrs * 3, 128), # /change dim
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.8),
            nn.Linear(128, 2) # 2 classes for CrossEntropyLoss (0 or 1)
        )

    def forward(self, input1, input2):
        feat1 = self.base_network(input1)
        feat2 = self.base_network(input2)
        combined_features = torch.cat([feat1, feat2, torch.abs(feat1 - feat2)], dim=1) #torch.cat((feat1, feat2), dim=1)
        output = self.classifier_head(combined_features)
        return output

print("Model classes defined.")

Model classes defined.


## Training

In [None]:
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train() # Set model to training mode
    total_loss = correct = total = 0
    #start_time = time.time()
    for batch_idx, (im1, im2, y) in enumerate(loader):
        im1, im2, y = im1.to(device), im2.to(device), y.to(device)
        optimizer.zero_grad()
        logits = model(im1, im2)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * im1.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)

    avg_loss = total_loss / total if total > 0 else 0
    avg_acc = correct / total if total > 0 else 0
    #print(f"  Train Time: {epoch_time:.2f}s")
    return avg_loss, avg_acc

def validate(model, loader, criterion, device):
    model.eval()
    total_loss = correct = total = 0
    with torch.no_grad():
        for im1, im2, y in loader:
            im1, im2, y = im1.to(device), im2.to(device), y.to(device)
            logits = model(im1, im2)
            loss = criterion(logits, y)
            total_loss += loss.item() * im1.size(0)
            preds = logits.argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)

    avg_loss = total_loss / total if total > 0 else 0
    avg_acc = correct / total if total > 0 else 0
    return avg_loss, avg_acc

print("Training/validation functions defined.")

print("\n Starting Training Phase")

print("\n--- Starting K-Fold Cross-Validation Training Phase ---")

full_dataset = RPSSiameseDataset(TRAIN_PKL_PATH)

if len(full_dataset) == 0:
    print("Training aborted: Could not load training data.")
else:

    try:
        dataset_labels = full_dataset.labels.numpy() # Get labels as numpy array for StratifiedKFold
        dataset_indices = np.arange(len(full_dataset))
    except AttributeError:
         print("Error: Cannot access full_dataset.labels. Make sure it's accessible.")
         # Handle error appropriately, maybe exit or try loading labels differently
         dataset_labels = None

    if dataset_labels is not None:
        skf = StratifiedKFold(n_splits=K_FOLDS, shuffle=True, random_state=42)
        fold_results = [] # Store best val_acc for each fold
        total_training_start_time = time.time()

        # K-Fold Loop
        for fold, (train_idx, val_idx) in enumerate(skf.split(dataset_indices, dataset_labels), 1):
            print(f"\n===== Starting Fold {fold}/{K_FOLDS} =====")
            fold_start_time = time.time()

            train_subset = Subset(full_dataset, train_idx)
            val_subset = Subset(full_dataset, val_idx)

            train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=os.cpu_count()//2, pin_memory=True)
            val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, num_workers=os.cpu_count()//2, pin_memory=True)
            print(f"Fold {fold}: Train batches={len(train_loader)}, Val batches={len(val_loader)}")

            model = SiameseNet(pretrained_base=USE_PRETRAINED_BASE).to(DEVICE)
            print(f"Fold {fold}: Initialized new model instance.")
            criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
            optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode='max', factor=0.5, patience=PATIENCE_LR, verbose=False
            )

            best_fold_val_acc = 0.0
            epochs_no_improve = 0
            fold_model_save_path = MODEL_SAVE_PATH_TEMPLATE.format(fold=fold)

            for epoch in range(1, EPOCHS_PER_FOLD + 1):

                model.train()
                tr_loss, tr_acc = train_one_epoch(model, train_loader, criterion, optimizer, DEVICE) # Reusing train_one_epoch function

                model.eval()
                va_loss, va_acc = validate(model, val_loader, criterion, DEVICE) # Reusing validate function

                print(f"  Fold {fold} Epoch {epoch:2d} | Train Loss: {tr_loss:.4f}, Acc: {tr_acc:.4f} | Val Loss: {va_loss:.4f}, Acc: {va_acc:.4f}")

                scheduler.step(va_acc)

                if va_acc > best_fold_val_acc:
                    print(f"    🚀 Fold {fold} Val Acc improved to {va_acc:.4f}. Saving model to {fold_model_save_path}")
                    best_fold_val_acc = va_acc
                    torch.save(model.state_dict(), fold_model_save_path)
                    epochs_no_improve = 0
                else:
                    epochs_no_improve += 1
                    if epochs_no_improve >= PATIENCE_ES:
                        print(f"    🚨 Fold {fold} Early stopping triggered after epoch {epoch}. Best val acc: {best_fold_val_acc:.4f}")
                        break

            fold_duration = time.time() - fold_start_time
            print(f"===== Fold {fold} Finished in {fold_duration:.2f}s. Best Val Acc: {best_fold_val_acc:.4f} =====")
            fold_results.append(best_fold_val_acc)

        total_training_duration = time.time() - total_training_start_time
        print(f"\n--- K-Fold Training Finished ---")
        print(f"Total Training Time: {total_training_duration:.2f}s")
        print(f"Validation accuracies per fold: {[f'{acc:.4f}' for acc in fold_results]}")
        print(f"Average K-Fold Validation Accuracy: {np.mean(fold_results):.4f} (+/- {np.std(fold_results):.4f})")


Training/validation functions defined.

 Starting Training Phase

--- Starting K-Fold Cross-Validation Training Phase ---
Pickle file 'train.pkl' loaded successfully.
Attempting to stack 'img1' data...
  'img1' stacked successfully. Shape: (40000, 24, 24)
Attempting to stack 'img2' data...
  'img2' stacked successfully. Shape: (40000, 24, 24)
Dataset initialized successfully from train.pkl: 40000 samples.

===== Starting Fold 1/5 =====
Fold 1: Train batches=1000, Val batches=250
Fold 1: Initialized new model instance.




  Fold 1 Epoch  1 | Train Loss: 0.8069, Acc: 0.5226 | Val Loss: 0.6847, Acc: 0.5986
    🚀 Fold 1 Val Acc improved to 0.5986. Saving model to best_siamese_resnet_fold_1.pth
  Fold 1 Epoch  2 | Train Loss: 0.6868, Acc: 0.6250 | Val Loss: 0.6165, Acc: 0.6739
    🚀 Fold 1 Val Acc improved to 0.6739. Saving model to best_siamese_resnet_fold_1.pth
  Fold 1 Epoch  3 | Train Loss: 0.6247, Acc: 0.6799 | Val Loss: 0.5970, Acc: 0.7070
    🚀 Fold 1 Val Acc improved to 0.7070. Saving model to best_siamese_resnet_fold_1.pth
  Fold 1 Epoch  4 | Train Loss: 0.5832, Acc: 0.7200 | Val Loss: 0.5696, Acc: 0.7430
    🚀 Fold 1 Val Acc improved to 0.7430. Saving model to best_siamese_resnet_fold_1.pth
  Fold 1 Epoch  5 | Train Loss: 0.5577, Acc: 0.7432 | Val Loss: 0.5406, Acc: 0.7594
    🚀 Fold 1 Val Acc improved to 0.7594. Saving model to best_siamese_resnet_fold_1.pth
  Fold 1 Epoch  6 | Train Loss: 0.5460, Acc: 0.7548 | Val Loss: 0.5304, Acc: 0.7614
    🚀 Fold 1 Val Acc improved to 0.7614. Saving model to

'\nprint("\n--- Starting Evaluation Phase (on Fold 1 Model\'s Validation Set) ---")\n\nfold_to_evaluate = 1\neval_model_path = MODEL_SAVE_PATH_TEMPLATE.format(fold=fold_to_evaluate)\n\nif os.path.exists(eval_model_path):\n    eval_model = SiameseNet(pretrained_base=USE_PRETRAINED_BASE).to(DEVICE) # Re-create model structure\n    try:\n        eval_model.load_state_dict(torch.load(eval_model_path, map_location=DEVICE))\n        eval_model.eval() # Set to evaluation mode\n        print(f"Loaded Fold {fold_to_evaluate} model state from {eval_model_path}")\n\n        # We need the validation data specific to Fold 1 again for evaluation\n        # Re-create the split for fold 1 to get val_idx1\n        if \'skf\' in locals() and \'dataset_indices\' in locals() and \'dataset_labels\' in locals():\n            # Get the indices for the fold we want to evaluate\n            temp_train_idx, val_idx_eval = list(skf.split(dataset_indices, dataset_labels))[fold_to_evaluate-1]\n            val_data

## Inference

In [None]:
print("\nStarting Inference Phase (Ensemble Prediction)")

models_for_inference = []
for fold in range(1, K_FOLDS + 1):
    model_path = MODEL_SAVE_PATH_TEMPLATE.format(fold=fold)
    if os.path.exists(model_path):
        model = SiameseNet(pretrained_base=USE_PRETRAINED_BASE).to(DEVICE)
        try:
            model.load_state_dict(torch.load(model_path, map_location=DEVICE))
            model.eval() # Set to evaluation mode
            models_for_inference.append(model)
            print(f"Loaded model from {model_path} for inference.")
        except Exception as e:
            print(f"Warning: Error loading model {model_path}: {e}. Skipping this model.")
    else:
        print(f"Warning: Model file not found at {model_path}. Skipping this model for ensemble.")

if not models_for_inference:
    print("Cannot perform inference: No trained models loaded.")
else:
    print(f"Loaded {len(models_for_inference)} models for ensemble inference.")

    test_dataset = RPSInferenceDataset(TEST_PKL_PATH)
    if len(test_dataset) > 0:
        test_loader = DataLoader(test_dataset, batch_size=INFERENCE_BATCH_SIZE, shuffle=False, num_workers=os.cpu_count()//2)
        print(f"Test DataLoader created: {len(test_loader)} batches.")

        all_preds_ensemble = []
        all_ids = []
        inference_start_time = time.time()
        with torch.no_grad():
            for im1, im2, ids_batch in test_loader:
                im1, im2 = im1.to(DEVICE), im2.to(DEVICE)

                batch_logits_list = []
                for model in models_for_inference:
                    logits = model(im1, im2)
                    batch_logits_list.append(logits)

                avg_logits = torch.stack(batch_logits_list).mean(dim=0)

                preds = avg_logits.argmax(dim=1).cpu().numpy()

                all_preds_ensemble.append(preds)
                all_ids.extend(ids_batch.numpy() if isinstance(ids_batch, torch.Tensor) else ids_batch)

        inference_duration = time.time() - inference_start_time
        print(f"Ensemble inference completed in {inference_duration:.2f}s")

        if all_preds_ensemble:
             final_preds = np.concatenate(all_preds_ensemble)
        else:
             final_preds = np.array([])
             print("Warning: No predictions were generated.")

        print("\n--- Starting Post-processing & Submission ---")

        if 'final_preds' in locals() and len(final_preds) == len(all_ids) and len(final_preds) > 0 :

            final_labels = np.where(final_preds == 1, 1, -1)
            submission_df = pd.DataFrame({'id': all_ids, 'label': final_labels})

            try:
                submission_df.to_csv(SUBMISSION_CSV_PATH, index=False)
                print(f"Submission file saved successfully to: {SUBMISSION_CSV_PATH}")
                print("\nSubmission file preview:")
                print(submission_df.head())
            except Exception as e:
                print(f"Error saving submission file: {e}")
        elif 'final_preds' in locals():
            print(f"Error: Number of predictions ({len(final_preds)}) does not match number of IDs ({len(all_ids)}). Cannot create submission file.")
        else:
            print("Error: No final predictions available to save.")

    else:
        print("Skipping inference: Test dataset could not be loaded or is empty.")

print("\n--- Notebook Execution Finished ---")


--- Starting Inference Phase (Ensemble Prediction) ---
Loaded model from best_siamese_resnet_fold_1.pth for inference.
Loaded model from best_siamese_resnet_fold_2.pth for inference.
Loaded model from best_siamese_resnet_fold_3.pth for inference.
Loaded model from best_siamese_resnet_fold_4.pth for inference.
Loaded model from best_siamese_resnet_fold_5.pth for inference.
Loaded 5 models for ensemble inference.
Pickle file 'test.pkl' loaded successfully for inference.
Attempting to stack 'img1' data for inference...
  'img1' stacked successfully. Shape: (20000, 24, 24)
Attempting to stack 'img2' data for inference...
  'img2' stacked successfully. Shape: (20000, 24, 24)
Inference dataset initialized successfully from test.pkl: 20000 samples.
Test DataLoader created: 157 batches.
Ensemble inference completed in 9.52s

--- Starting Post-processing & Submission ---
Submission file saved successfully to: submission_siamese_resnet_acc.csv

Submission file preview:
     id  label
0  3386   