In [None]:
!unzip torch-it-up.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95004.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95005.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95006.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95007.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95008.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95009.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95010.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95011.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95012.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95013.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95014.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95015.png  
  inflating: dataset/Dataset_Image/Dataset_Image/data/v2-95016.png  
  inflating: dataset/Dataset_Image/Dat

In [17]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import models
from PIL import Image
from tqdm import tqdm

# Check GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define paths
DATASET_PATH = "./dataset"
TRAIN_CSV = os.path.join(DATASET_PATH, "train.csv")
TEST_CSV = os.path.join(DATASET_PATH, "test.csv")
IMAGE_FOLDER = os.path.join(DATASET_PATH, "Dataset_Image/Dataset_Image")

# Load CSV
train_df = pd.read_csv(TRAIN_CSV)
test_df = pd.read_csv(TEST_CSV)

# Remap labels to 0-based index
unique_labels = sorted(train_df['label'].unique())
label_mapping = {old_label: new_label for new_label, old_label in enumerate(unique_labels)}
train_df['label'] = train_df['label'].map(label_mapping)
num_classes = len(unique_labels)

# Data Augmentation for Training
train_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1)),  # Random rotations and shifts
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Transform for Test (No Augmentation)
test_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Custom Dataset Class
class SymbolDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None, train=True):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform
        self.train = train

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.dataframe.iloc[idx]['image_path'])
        image = Image.open(img_path).convert("L")  # Convert grayscale

        if self.transform:
            image = self.transform(image)

        if self.train:
            label = int(self.dataframe.iloc[idx]['label'])
            return image, label
        else:
            example_id = self.dataframe.iloc[idx]['example_id']
            return image, example_id

# Train-Test Split (98% Train, 2% Validation)
train_size = int(0.98 * len(train_df))
val_size = len(train_df) - train_size

train_dataset, val_dataset = random_split(
    SymbolDataset(train_df, IMAGE_FOLDER, transform=train_transform, train=True),
    [train_size, val_size]
)

# Dataloaders (Optimized for Speed)
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(SymbolDataset(test_df, IMAGE_FOLDER, transform=test_transform, train=False),
                         batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

# Define Model (ResNet18 with Modified Input Layer)
class SymbolClassifier(nn.Module):
    def __init__(self, num_classes):
        super(SymbolClassifier, self).__init__()
        self.model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        self.model.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)  # Change to 1-channel input
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        return self.model(x)

# Initialize Model, Loss, and Optimizer
model = SymbolClassifier(num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, verbose=True)

# Mixed Precision Training for Speedup
scaler = torch.cuda.amp.GradScaler()

# Training Loop
epochs = 45
best_acc = 0.0

for epoch in range(epochs):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():  # Mixed precision
            outputs = model(images)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_acc = 100 * correct / total
    print(f" Loss: {running_loss/len(train_loader):.4f}, Accuracy: {train_acc:.2f}%")

    # Validation Step
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    val_acc = 100 * correct / total
    print(f"Validation Accuracy: {val_acc:.2f}%")

    # Early Stopping & LR Reduction
    scheduler.step(val_acc)
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "./best_model.pth")  # Save best model
        print("Model improved, saved!")

print("Training complete!")

# Load Best Model for Submission
model.load_state_dict(torch.load("./best_model.pth"))
model.eval()

# Generate Predictions
predictions = []
with torch.no_grad():
    for images, example_ids in tqdm(test_loader, desc="Generating Predictions"):
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        for eid, pred in zip(example_ids, predicted.cpu().numpy()):
            predictions.append((eid, unique_labels[pred]))  # Convert back to original label

# Create Submission File
submission_df = pd.DataFrame(predictions, columns=["example_id", "label"])
submission_df.to_csv("./submission.csv", index=False)
print("Submission file saved!")


Using device: cuda


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():  # Mixed precision
Epoch 1/45: 100%|██████████| 1031/1031 [01:25<00:00, 12.04it/s]

 Loss: 1.2846, Accuracy: 65.92%





Validation Accuracy: 73.51%
Model improved, saved!


Epoch 2/45: 100%|██████████| 1031/1031 [01:24<00:00, 12.19it/s]

 Loss: 0.8076, Accuracy: 75.35%





Validation Accuracy: 76.75%
Model improved, saved!


Epoch 3/45: 100%|██████████| 1031/1031 [01:26<00:00, 11.91it/s]

 Loss: 0.7085, Accuracy: 77.78%





Validation Accuracy: 78.05%
Model improved, saved!


Epoch 4/45: 100%|██████████| 1031/1031 [01:26<00:00, 11.98it/s]

 Loss: 0.6611, Accuracy: 78.86%





Validation Accuracy: 79.38%
Model improved, saved!


Epoch 5/45: 100%|██████████| 1031/1031 [01:25<00:00, 12.07it/s]

 Loss: 0.6168, Accuracy: 79.82%





Validation Accuracy: 79.75%
Model improved, saved!


Epoch 6/45: 100%|██████████| 1031/1031 [01:25<00:00, 12.03it/s]

 Loss: 0.5905, Accuracy: 80.40%





Validation Accuracy: 79.75%


Epoch 7/45: 100%|██████████| 1031/1031 [01:23<00:00, 12.32it/s]

 Loss: 0.5601, Accuracy: 81.11%





Validation Accuracy: 81.32%
Model improved, saved!


Epoch 8/45: 100%|██████████| 1031/1031 [01:23<00:00, 12.35it/s]

 Loss: 0.5370, Accuracy: 81.77%





Validation Accuracy: 80.13%


Epoch 9/45: 100%|██████████| 1031/1031 [01:23<00:00, 12.41it/s]

 Loss: 0.5157, Accuracy: 82.15%





Validation Accuracy: 81.39%
Model improved, saved!


Epoch 10/45: 100%|██████████| 1031/1031 [01:23<00:00, 12.31it/s]

 Loss: 0.4951, Accuracy: 82.75%





Validation Accuracy: 81.54%
Model improved, saved!


Epoch 11/45: 100%|██████████| 1031/1031 [01:23<00:00, 12.35it/s]

 Loss: 0.4820, Accuracy: 83.00%





Validation Accuracy: 81.20%


Epoch 12/45: 100%|██████████| 1031/1031 [01:23<00:00, 12.34it/s]

 Loss: 0.4588, Accuracy: 83.53%





Validation Accuracy: 82.06%
Model improved, saved!


Epoch 13/45: 100%|██████████| 1031/1031 [01:24<00:00, 12.25it/s]

 Loss: 0.4492, Accuracy: 83.85%





Validation Accuracy: 81.43%


Epoch 14/45: 100%|██████████| 1031/1031 [01:22<00:00, 12.45it/s]

 Loss: 0.4321, Accuracy: 84.39%





Validation Accuracy: 81.80%


Epoch 15/45: 100%|██████████| 1031/1031 [01:22<00:00, 12.43it/s]

 Loss: 0.4206, Accuracy: 84.65%





Validation Accuracy: 81.95%


Epoch 16/45: 100%|██████████| 1031/1031 [01:23<00:00, 12.38it/s]

 Loss: 0.3572, Accuracy: 86.47%





Validation Accuracy: 82.95%
Model improved, saved!


Epoch 17/45: 100%|██████████| 1031/1031 [01:23<00:00, 12.41it/s]

 Loss: 0.3414, Accuracy: 86.96%





Validation Accuracy: 82.95%


Epoch 18/45: 100%|██████████| 1031/1031 [01:22<00:00, 12.51it/s]

 Loss: 0.3322, Accuracy: 87.17%





Validation Accuracy: 83.40%
Model improved, saved!


Epoch 19/45: 100%|██████████| 1031/1031 [01:21<00:00, 12.63it/s]

 Loss: 0.3223, Accuracy: 87.57%





Validation Accuracy: 82.88%


Epoch 20/45: 100%|██████████| 1031/1031 [01:21<00:00, 12.67it/s]

 Loss: 0.3158, Accuracy: 87.76%





Validation Accuracy: 82.54%


Epoch 21/45: 100%|██████████| 1031/1031 [01:22<00:00, 12.49it/s]

 Loss: 0.3065, Accuracy: 88.07%





Validation Accuracy: 82.39%


Epoch 22/45: 100%|██████████| 1031/1031 [01:22<00:00, 12.51it/s]

 Loss: 0.2742, Accuracy: 89.25%





Validation Accuracy: 83.21%


Epoch 23/45: 100%|██████████| 1031/1031 [01:23<00:00, 12.28it/s]

 Loss: 0.2625, Accuracy: 89.56%





Validation Accuracy: 82.50%


Epoch 24/45: 100%|██████████| 1031/1031 [01:23<00:00, 12.42it/s]

 Loss: 0.2566, Accuracy: 89.85%





Validation Accuracy: 82.65%


Epoch 25/45: 100%|██████████| 1031/1031 [01:24<00:00, 12.20it/s]

 Loss: 0.2366, Accuracy: 90.51%





Validation Accuracy: 83.28%


Epoch 26/45: 100%|██████████| 1031/1031 [01:22<00:00, 12.45it/s]

 Loss: 0.2311, Accuracy: 90.78%





Validation Accuracy: 83.21%


Epoch 27/45: 100%|██████████| 1031/1031 [01:26<00:00, 11.93it/s]

 Loss: 0.2269, Accuracy: 90.91%





Validation Accuracy: 83.66%
Model improved, saved!


Epoch 28/45: 100%|██████████| 1031/1031 [01:24<00:00, 12.22it/s]

 Loss: 0.2228, Accuracy: 91.10%





Validation Accuracy: 81.95%


Epoch 29/45: 100%|██████████| 1031/1031 [01:23<00:00, 12.29it/s]

 Loss: 0.2191, Accuracy: 91.24%





Validation Accuracy: 82.65%


Epoch 30/45: 100%|██████████| 1031/1031 [01:25<00:00, 12.01it/s]

 Loss: 0.2140, Accuracy: 91.37%





Validation Accuracy: 82.28%


Epoch 31/45: 100%|██████████| 1031/1031 [01:26<00:00, 11.87it/s]

 Loss: 0.2057, Accuracy: 91.83%





Validation Accuracy: 83.25%


Epoch 32/45: 100%|██████████| 1031/1031 [01:24<00:00, 12.17it/s]

 Loss: 0.2025, Accuracy: 91.92%





Validation Accuracy: 82.13%


Epoch 33/45: 100%|██████████| 1031/1031 [01:25<00:00, 12.12it/s]

 Loss: 0.1988, Accuracy: 92.01%





Validation Accuracy: 82.80%


Epoch 34/45: 100%|██████████| 1031/1031 [01:26<00:00, 11.94it/s]

 Loss: 0.1941, Accuracy: 92.24%





Validation Accuracy: 82.76%


Epoch 35/45: 100%|██████████| 1031/1031 [01:26<00:00, 11.95it/s]

 Loss: 0.1910, Accuracy: 92.38%





Validation Accuracy: 82.36%


Epoch 36/45: 100%|██████████| 1031/1031 [01:26<00:00, 11.92it/s]

 Loss: 0.1931, Accuracy: 92.34%





Validation Accuracy: 83.10%


Epoch 37/45: 100%|██████████| 1031/1031 [01:25<00:00, 12.02it/s]

 Loss: 0.1883, Accuracy: 92.54%





Validation Accuracy: 82.39%


Epoch 38/45:  38%|███▊      | 393/1031 [00:33<00:54, 11.81it/s]


KeyboardInterrupt: 

In [None]:
!sed -E 's/tensor\(([0-9]+)\)/\1/' submission.csv > cleaned_file.csv
!mv submission.csv unfor_sub.csv
!mv cleaned_file.csv submission.csv