In [34]:
import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split, KFold
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF
from PIL import Image


In [35]:
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7f14d0558d70>

In [36]:
K_FOLDS = 5
NUM_EPOCHS = 20
BATCH_SIZE = 32
LEARNING_RATE = 1e-4
WEIGHT_DECAY = 1e-6
DROPOUT_RATE = 0.3
PATIENCE = 30
SCHEDULER_FACTOR = 0.5
SCHEDULER_PATIENCE = 5
EPOCHS_PER_PRINT = 1

TRAIN_DATA_PATH = 'train.pkl'
TEST_DATA_PATH = 'test.pkl'
MODEL_PATH = 'best_model.pth'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [37]:
with open(TRAIN_DATA_PATH, "rb") as f:
    full_train_data = pickle.load(f)
with open(TEST_DATA_PATH, "rb") as f:
    test_data = pickle.load(f)

In [38]:
train_df, test_df = train_test_split(full_train_data, test_size=0.1, random_state=42)

In [39]:
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomAffine(degrees=8, translate=(0.1, 0.1), scale=(0.95, 1.05), shear=5),
    transforms.RandomPerspective(distortion_scale=0.1, p=0.5),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),
    # transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value=0),
])
test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

In [40]:
class RPSDataset(Dataset):
    def __init__(self, df, labels_available=True, transform=None, is_train=False):
        self.data = df
        self.id = np.array(self.data['id'], dtype=np.int64)
        self.image1 = np.array(self.data['img1'].tolist(), dtype=np.uint8)
        self.image2 = np.array(self.data['img2'].tolist(), dtype=np.uint8)
        self.labels_available = labels_available
        if labels_available:
            self.labels = np.array(self.data['label'], dtype=np.int64)
        else:
            self.labels = None
        self.transform = transform
        self.is_train = is_train

    def __len__(self):
        return len(self.image1)
    
    def __getitem__(self, idx):
        img1_np = self.image1[idx]
        img2_np = self.image2[idx]
        id_val = self.id[idx]

        if self.transform:
            img1 = self.transform(img1_np)
            img2 = self.transform(img2_np)
        else:
            img1 = torch.from_numpy(img1_np.astype(np.float32)).unsqueeze(0) / 255.0
            img2 = torch.from_numpy(img2_np.astype(np.float32)).unsqueeze(0) / 255.0

        sample = {'img1': img1, 'img2': img2, 'id': id_val}
        if self.labels_available:
            sample['label'] = self.labels[idx]
        return sample


In [41]:
train_dataset = RPSDataset(train_df, labels_available=True, transform=train_transform, is_train=True)
validation_dataset = RPSDataset(test_df, labels_available=True, transform=test_transform, is_train=False)
test_dataset = RPSDataset(test_data, labels_available=False, transform=test_transform, is_train=False)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [42]:
# class ConcatenatedCNN(nn.Module):
#     def __init__(self):
#         super(ConcatenatedCNN, self).__init__()
#         self.conv1 = nn.Conv2d(in_channels=2, out_channels=16, kernel_size=3, padding=1)
#         self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
#         self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
#         self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

#         self.flattend_size = 32 * 6 * 6

#         self.fc1 = nn.Linear(self.flattend_size, 64)
#         self.Dropout = nn.Dropout(0.5)
#         self.fc2 = nn.Linear(64, 1)

#     def forward(self, x1, x2):
#         x = torch.cat((x1, x2), dim=1)
#         x = F.relu(self.conv1(x))
#         x = self.pool1(x)
#         x = F.relu(self.conv2(x))
#         x = self.pool2(x)

#         x = x.view(-1, self.flattend_size)
#         x = F.relu(self.fc1(x))
#         x = self.Dropout(x)
#         x = self.fc2(x)
#         return x

In [43]:
# This is the structure that yields the best results so far 3 block 128 dim
class BaseFeatureExtractor(nn.Module):
    def __init__(self, output_dim=128):
        super(BaseFeatureExtractor, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.flattend_size = 128 * 3 * 3
        self.fc = nn.Linear(self.flattend_size, output_dim)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = F.relu(self.conv3(x))
        x = self.pool3(x)

        x = x.view(-1, self.flattend_size)
        x = F.relu(self.fc(x))
        return x
    
class SiameseNetwork(nn.Module):
    def __init__(self, feature_output_dim=128, dropout=DROPOUT_RATE):
        super(SiameseNetwork, self).__init__()
        self.base_feature_extractor = BaseFeatureExtractor(output_dim=feature_output_dim)
        self.classifier_input_dim = feature_output_dim * 2
        # self.classifier_input_dim = feature_output_dim
        self.fc1 = nn.Linear(self.classifier_input_dim, 64)
        self.Dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(64, 1)

    def forward(self, input1, input2):
        output1 = self.base_feature_extractor(input1)
        output2 = self.base_feature_extractor(input2)
        combined = torch.concatenate((output1, output2), dim=1)
        # combined = torch.abs(output1 - output2)
        x = self.fc1(combined)
        x = F.relu(x)
        x = self.Dropout(x)

        x = self.fc2(x)
        return x
    

In [44]:
def train_model(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for batch in dataloader:
        img1 = batch['img1'].to(device)
        img2 = batch['img2'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()
        outputs = model(img1, img2)

        loss = criterion(outputs.squeeze(), labels.float().squeeze())
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * labels.size(0)
        predicted = torch.sign(outputs.squeeze())
        predicted[predicted == 0] = -1
        correct_predictions += (predicted == labels.float().squeeze()).sum().item()
        total_samples += labels.size(0)

    epoch_acc = correct_predictions / total_samples
    epoch_loss = running_loss / total_samples
    return epoch_loss, epoch_acc

def validate_model(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for batch in dataloader:
            img1 = batch['img1'].to(device)
            img2 = batch['img2'].to(device)
            labels = batch['label'].to(device)

            outputs = model(img1, img2)
            loss = criterion(outputs.squeeze(), labels.float().squeeze())

            running_loss += loss.item() * labels.size(0)
            predicted = torch.sign(outputs.squeeze())
            predicted[predicted == 0] = -1
            correct_predictions += (predicted == labels.float().squeeze()).sum().item()
            total_samples += labels.size(0)

    epoch_acc = correct_predictions / total_samples
    epoch_loss = running_loss / total_samples
    return epoch_loss, epoch_acc

In [45]:
def run_training(model, train_loader, validation_loader, criterion, optimizer, 
                 device, num_epochs=NUM_EPOCHS, model_save_path=MODEL_PATH,
                 use_scheduler=True, patience = PATIENCE, scheduler_factor=SCHEDULER_FACTOR, 
                 scheduler_patience=SCHEDULER_PATIENCE, print_every=EPOCHS_PER_PRINT):
    best_accuracy = 0
    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'lr': []}

    scheduler = None
    if use_scheduler:
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor = scheduler_factor, 
                                                   patience=scheduler_patience)

    patience = patience
    patience_counter = 0

    print("Start training ...")
    
    for epoch in range(num_epochs):
        train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc = validate_model(model, validation_loader, criterion, device)
        current_lr = optimizer.param_groups[0]['lr']
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['lr'].append(current_lr)

        if (epoch+1) % print_every == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
                f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

        if use_scheduler:
            scheduler.step(val_acc)
            
        if val_acc > best_accuracy:
            best_accuracy = val_acc
            torch.save(model.state_dict(), model_save_path)
            print(f"Model saved with accuracy: {best_accuracy:.4f}")
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping triggered after {patience_counter} epochs without improvement.")
                break
    
    print("Training complete.")
    print(f"Best validation accuracy: {best_accuracy:.4f}")
    return best_model, history, best_accuracy, model

In [46]:
# def run_training_fold(model, train_loader, validation_loader, cretirion, optimizer, 
#                       device, num_epochs=NUM_EPOCHS, model_save_path=MODEL_PATH,
#                       use_scheduler=True, patience = PATIENCE, scheduler_factor=SCHEDULER_FACTOR, 
#                       scheduler_patience=SCHEDULER_PATIENCE, print_every=EPOCHS_PER_PRINT):
#     best_accuracy = 0
#     history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'lr': []}

#     scheduler = None
#     if use_scheduler:
#         scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor = scheduler_factor, 
#                                                    patience=scheduler_patience)

#     patience = PATIENCE
#     patience_counter = 0
#     best_model_state = None

#     print("Start training ...")
    
#     for epoch in range(num_epochs):
#         train_loss, train_acc = train_model(model, train_loader, cretirion, optimizer, device)
#         val_loss, val_acc = validate_model(model, validation_loader, cretirion, device)
#         current_lr = optimizer.param_groups[0]['lr']
#         history['train_loss'].append(train_loss)
#         history['train_acc'].append(train_acc)
#         history['val_loss'].append(val_loss)
#         history['val_acc'].append(val_acc)
#         history['lr'].append(current_lr)

#         if (epoch+1) % print_every == 0:
#             print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
#                 f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

#         if use_scheduler:
#             scheduler.step(val_acc)

#         if val_acc > best_accuracy:
#             best_accuracy = val_acc
#             torch.save(model.state_dict(), model_save_path)
#             print(f"Model saved with accuracy: {best_accuracy:.4f}")
#             best_model_state = model.state_dict()
#             patience_counter = 0
#         else:
#             patience_counter += 1
#             if patience_counter >= patience:
#                 print(f"Early stopping triggered after {patience_counter} epochs without improvement.")
#                 break
    
#     print("Training complete.")
#     print(f"Best validation accuracy: {best_accuracy:.4f}")
#     return model_save_path, best_accuracy

In [47]:
# kf = KFold(n_splits=K_FOLDS, shuffle=True, random_state=42)
# fold_results = []
# test_predictions = []

# full_rps_dataset = RPSDataset(full_train_data, labels_available=True, transform=None, is_train=True)

# test_dataset_obj = RPSDataset(test_data, labels_available=False, transform=test_transform, is_train=False)
# test_loader_kf = DataLoader(test_dataset_obj, batch_size=BATCH_SIZE, shuffle=False)

# for fold, (train_idx, val_idx) in enumerate(kf.split(full_train_data)):
#     print(f"Fold {fold+1}/{K_FOLDS}")
#     train_subset = Subset(full_rps_dataset, train_idx)
#     val_subset = Subset(full_rps_dataset, val_idx)

#     train_dataset_fold = RPSDataset(full_train_data.iloc[train_idx], labels_available=True, transform=train_transform, is_train=True)
#     val_dataset_fold = RPSDataset(full_train_data.iloc[val_idx], labels_available=True, transform=test_transform, is_train=False)

#     train_loader = DataLoader(train_dataset_fold, batch_size=BATCH_SIZE, shuffle=True)
#     val_loader = DataLoader(val_dataset_fold, batch_size=BATCH_SIZE, shuffle=False)

#     model_fold = SiameseNetwork(dropout=DROPOUT_RATE).to(device)
#     criterion_fold = nn.SoftMarginLoss()
#     optimizer_fold = optim.AdamW(model_fold.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
#     model_path_fold = f"best_model_fold_{fold+1}.pth"

#     best_model_path, best_accuracy = run_training_fold(model_fold, train_loader, val_loader, criterion_fold, optimizer_fold,
#                                                       device, num_epochs=NUM_EPOCHS, model_save_path=model_path_fold,
#                                                       use_scheduler=True, patience=PATIENCE,
#                                                       scheduler_factor=SCHEDULER_FACTOR,
#                                                       scheduler_patience=SCHEDULER_PATIENCE,
#                                                       print_every=EPOCHS_PER_PRINT)
    
#     fold_results.append({
#         'fold': fold + 1,
#         'best_accuracy': best_accuracy,
#         'model_path': best_model_path
#     })

#     # Load the best model for inference
#     best_model_fold = SiameseNetwork(dropout=DROPOUT_RATE).to(device)
#     best_model_fold.load_state_dict(torch.load(best_model_path, map_location=device))
#     best_model_fold.eval()

#     # Inference on the test set
#     test_predictions_fold = []
#     with torch.no_grad():
#         for batch in test_loader_kf:
#             img1 = batch['img1'].to(device)
#             img2 = batch['img2'].to(device)

#             outputs_original = best_model_fold(img1, img2).squeeze()

#             img1_hf = TF.hflip(img1)
#             img2_hf = TF.hflip(img2)
#             outputs_hf = best_model_fold(img1_hf, img2_hf).squeeze()

#             if outputs_original.dim() == 0:
#                 outputs_original = outputs_original.unsqueeze(0)
#             if outputs_hf.dim() == 0:
#                 outputs_hf = outputs_hf.unsqueeze(0)

#             average_outputs = (outputs_original + outputs_hf) / 2.0
#             test_predictions_fold.append(average_outputs.cpu().numpy())

#     test_predictions.append(np.concatenate(test_predictions_fold))

#     del model_fold, optimizer_fold, train_loader, val_loader, best_model_fold
#     if torch.cuda.is_available():
#         torch.cuda.empty_cache()

In [48]:
# for res in fold_results:
#     print(f"Fold {res['fold']}: Best Accuracy: {res['best_accuracy']:.4f}, Model Path: {res['model_path']}")
# test_predictions = [np.array(pred) for pred in test_predictions]

# ensemble_predictions = np.mean(np.stack(test_predictions, axis=0), axis=0)
# print(f"Shape of ensemble predictions: {ensemble_predictions.shape}")

In [49]:
# final_ensemble_predictions = np.sign(ensemble_predictions)
# final_ensemble_predictions[final_ensemble_predictions == 0] = -1

# test_ids_ensemble = test_dataset_obj.id
# submission_df = pd.DataFrame({
#     'id': test_ids_ensemble,
#     'label': final_ensemble_predictions.astype(int)
# })
# submission_df.to_csv('submission.csv', index=False)
# print("Submission file created.")

In [50]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = ConcatenatedCNN().to(device)
# model = SiameseNetwork().to(device)
# criterion = nn.SoftMarginLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

In [51]:
import itertools

learning_rates = [1e-6]
weight_decays = [1e-6]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dropout_rates = [0.3]

parameter_combinations = list(itertools.product(learning_rates, weight_decays, dropout_rates))
results = []

for lr, wd, dr in parameter_combinations:
    print(f"Training with learning rate: {lr}, weight decay: {wd}, dropout rate: {dr}")
    model = SiameseNetwork(dropout=dr).to(device)
    model.load_state_dict(torch.load("all_best_model/3block_128dim_lr_1e-05_wd_1e-05_dr_0.5.pth"))
    criterion = nn.SoftMarginLoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    model_path = f"best_model_lr_{lr}_wd_{wd}_dr_{dr}.pth"

    final_model, history, best_acc, last_model = run_training(model, train_loader, validation_loader, criterion, optimizer, 
                                                  device, NUM_EPOCHS, model_path, use_scheduler=False, patience=NUM_EPOCHS)
    
    results.append({'lr': lr, 'wd': wd, 'dr': dr, 'best_val_acc': best_acc, 'history': history})
    print(f"Best validation accuracy for lr={lr}, wd={wd}, dr={dr}: {best_acc:.4f}")

    del model
    del optimizer
    if torch.cuda.is_available():
        torch.cuda.empty_cache()


Training with learning rate: 1e-06, weight decay: 1e-06, dropout rate: 0.3
Start training ...
Epoch [1/20], Train Loss: 0.2821, Train Acc: 0.8777, Val Loss: 0.3943, Val Acc: 0.8460
Model saved with accuracy: 0.8460
Epoch [2/20], Train Loss: 0.2787, Train Acc: 0.8809, Val Loss: 0.3952, Val Acc: 0.8468
Model saved with accuracy: 0.8468
Epoch [3/20], Train Loss: 0.2816, Train Acc: 0.8791, Val Loss: 0.3960, Val Acc: 0.8465
Epoch [4/20], Train Loss: 0.2761, Train Acc: 0.8824, Val Loss: 0.3968, Val Acc: 0.8468
Epoch [5/20], Train Loss: 0.2788, Train Acc: 0.8818, Val Loss: 0.3975, Val Acc: 0.8460


KeyboardInterrupt: 

In [None]:
# best_model_1 = SiameseNetwork(dropout=0.3).to(device)
# best_model_1.load_state_dict(torch.load("3block_128dim_lr_5e-05_wd_1e-06_dr_0.3.pth"))
# best_model_1.eval()
# predictions = []
# test_ids = []

# with torch.no_grad():
#     for batch in test_loader:
#         img1 = batch['img1'].to(device)
#         img2 = batch['img2'].to(device)
#         ids = batch['id']

#         outputs = best_model_1(img1, img2)
#         predicted = torch.sign(outputs.squeeze())
#         predicted[predicted == 0] = -1

#         predicted = predicted.cpu().numpy()
#         ids = ids.numpy()

#         predictions.extend(predicted)
#         if isinstance(ids, torch.Tensor):
#             test_ids.extend(ids.cpu().numpy().tolist())
#         else:
#             test_ids.extend(ids)

# submission_df = pd.DataFrame({'id': test_ids, 'label': predictions})
# submission_df['label'] = submission_df['label'].astype(int)
# submission_df.to_csv('submission.csv', index=False)

In [None]:
best_model_1 = SiameseNetwork(dropout=0.3).to(device)
best_model_1.load_state_dict(torch.load("best_model_lr_1e-05_wd_1e-05_dr_0.5.pth"))
best_model_1.eval()
tta_predictions = []
test_ids_tta = []

with torch.no_grad():
    for batch in test_loader:
        img1 = batch['img1'].to(device)
        img2 = batch['img2'].to(device)
        ids = batch['id']

        output_original = best_model_1(img1, img2).squeeze()

        img1_hf = TF.hflip(img1)
        img2_hf = TF.hflip(img2)
        output_hf = best_model_1(img1_hf, img2_hf).squeeze()

        if output_original.dim() == 0:
            output_original = output_original.unsqueeze(0)
        if output_hf.dim() == 0:
            output_hf = output_hf.unsqueeze(0)

        average_output = (output_original + output_hf) / 2.0

        tta_predictions.extend(average_output.cpu().numpy())

        if isinstance(ids, torch.Tensor):
            test_ids_tta.extend(ids.cpu().numpy().tolist())
        else:
            test_ids_tta.extend(ids)

final_predictions_tta = np.sign(np.array(tta_predictions))
final_predictions_tta[final_predictions_tta == 0] = -1

submission_df = pd.DataFrame({'id': test_ids_tta, 'label': final_predictions_tta.astype(int)})
submission_df.to_csv('submission.csv', index=False)

In [None]:
results_df = pd.DataFrame(results)
pd.DataFrame(results_df['history'][0])

Unnamed: 0,train_loss,train_acc,val_loss,val_acc,lr
0,0.409684,0.820611,0.422744,0.84425,1e-05
1,0.387444,0.823306,0.409138,0.8425,1e-05
2,0.384957,0.823083,0.399084,0.84225,1e-05
3,0.378467,0.82675,0.395369,0.84175,1e-05
4,0.375652,0.828528,0.394458,0.84125,1e-05
5,0.373304,0.831472,0.390618,0.84275,1e-05
6,0.370057,0.829639,0.39219,0.84425,1e-05
7,0.370574,0.832889,0.391002,0.8435,5e-06
8,0.370326,0.830333,0.390883,0.83975,5e-06
9,0.370434,0.832806,0.391502,0.84075,5e-06


: 

In [None]:
model.eval()
predictions = []
test_ids = []

with torch.no_grad():
    for batch in test_loader:
        img1 = batch['img1'].to(device)
        img2 = batch['img2'].to(device)
        ids = batch['id']

        outputs = model(img1, img2)
        predicted = torch.sign(outputs.squeeze())
        predicted[predicted == 0] = -1

        predicted = predicted.cpu().numpy()
        ids = ids.numpy()

        predictions.extend(predicted)
        if isinstance(ids, torch.Tensor):
            test_ids.extend(ids.cpu().numpy().tolist())
        else:
            test_ids.extend(ids)

submission_df = pd.DataFrame({'id': test_ids, 'label': predictions})
submission_df['label'] = submission_df['label'].astype(int)
submission_df.to_csv('submission.csv', index=False)
print("Submission file created: submission.csv")
# Save the model
torch.save(model.state_dict(), "siamese_model.pth")
print("Model saved as siamese_model.pth")