In [1]:
# !mkdir -p ~/.kaggle
# !mv kaggle.json ~/.kaggle/

In [2]:
# !chmod 600 ~/.kaggle/kaggle.json

In [3]:
# !kaggle competitions download -c ai-knight

In [4]:
# # Unzip the downloaded dataset
# !unzip /content/ai-knight.zip -d output

In [5]:
# !ls output

In [37]:
torch.cuda.empty_cache()


In [38]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.models import resnet50
from sklearn.metrics import f1_score
from tqdm import tqdm
import pandas as pd
from PIL import Image

# Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [39]:
# Paths
train_real_dir = '/kaggle/input/ai-knight/dataset/train/real'
train_fake_dir = '/kaggle/input/ai-knight/dataset/train/fake'
test_dir = '/kaggle/input/ai-knight/dataset/test'

In [40]:
# Custom Dataset Class
class ImageDataset(Dataset):
    def __init__(self, image_dir, label, transform=None):
        self.image_dir = image_dir
        self.label = label
        self.image_paths = [os.path.join(image_dir, img) for img in os.listdir(image_dir)]
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, self.label

In [41]:
# Data Augmentation and Transformation
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [42]:
# Load Training Data
real_dataset = ImageDataset(train_real_dir, 0, transform)
fake_dataset = ImageDataset(train_fake_dir, 1, transform)

train_dataset = torch.utils.data.ConcatDataset([real_dataset, fake_dataset])
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)

In [43]:
# Model
model = resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 2)  # Binary classification
model = model.to(device)

# Loss and Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [44]:
# Learning Rate Scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

In [45]:
# Training Loop
num_epochs = 12
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    all_labels, all_preds = [], []
    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)

        # Forward Pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward Pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metrics
        running_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

    f1 = f1_score(all_labels, all_preds, average='weighted')
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}, F1-Score: {f1:.4f}")
    scheduler.step()

# Save Model
torch.save(model.state_dict(), "best_model_epoch_12_batch_16.pth")

100%|██████████| 6250/6250 [10:18<00:00, 10.10it/s]


Epoch [1/15], Loss: 0.3104, F1-Score: 0.8667


100%|██████████| 6250/6250 [10:18<00:00, 10.10it/s]


Epoch [2/15], Loss: 0.2484, F1-Score: 0.8961


100%|██████████| 6250/6250 [10:19<00:00, 10.10it/s]


Epoch [3/15], Loss: 0.2223, F1-Score: 0.9084


100%|██████████| 6250/6250 [10:19<00:00, 10.10it/s]


Epoch [4/15], Loss: 0.2086, F1-Score: 0.9136


100%|██████████| 6250/6250 [10:18<00:00, 10.10it/s]


Epoch [5/15], Loss: 0.1960, F1-Score: 0.9188


100%|██████████| 6250/6250 [10:19<00:00, 10.09it/s]


Epoch [6/15], Loss: 0.1635, F1-Score: 0.9335


100%|██████████| 6250/6250 [10:19<00:00, 10.09it/s]


Epoch [7/15], Loss: 0.1565, F1-Score: 0.9367


100%|██████████| 6250/6250 [10:19<00:00, 10.09it/s]


Epoch [8/15], Loss: 0.1490, F1-Score: 0.9402


100%|██████████| 6250/6250 [10:19<00:00, 10.09it/s]


Epoch [9/15], Loss: 0.1443, F1-Score: 0.9414


100%|██████████| 6250/6250 [10:19<00:00, 10.09it/s]


Epoch [10/15], Loss: 0.1419, F1-Score: 0.9430


100%|██████████| 6250/6250 [10:19<00:00, 10.08it/s]


Epoch [11/15], Loss: 0.1299, F1-Score: 0.9473


100%|██████████| 6250/6250 [10:19<00:00, 10.09it/s]


Epoch [12/15], Loss: 0.1253, F1-Score: 0.9498


100%|██████████| 6250/6250 [10:19<00:00, 10.08it/s]


Epoch [13/15], Loss: 0.1204, F1-Score: 0.9519


100%|██████████| 6250/6250 [10:19<00:00, 10.08it/s]


Epoch [14/15], Loss: 0.1182, F1-Score: 0.9517


100%|██████████| 6250/6250 [10:19<00:00, 10.08it/s]


Epoch [15/15], Loss: 0.1159, F1-Score: 0.9530


In [None]:
# 100%|██████████| 1563/1563 [02:31<00:00, 10.32it/s]
# Epoch [1/10], Loss: 0.2508, F1-Score: 0.8943
# 100%|██████████| 1563/1563 [02:29<00:00, 10.44it/s]
# Epoch [2/10], Loss: 0.2001, F1-Score: 0.9179
# 100%|██████████| 1563/1563 [02:30<00:00, 10.41it/s]
# Epoch [3/10], Loss: 0.1816, F1-Score: 0.9256
# 100%|██████████| 1563/1563 [02:29<00:00, 10.45it/s]
# Epoch [4/10], Loss: 0.1694, F1-Score: 0.9305
# 100%|██████████| 1563/1563 [02:30<00:00, 10.41it/s]
# Epoch [5/10], Loss: 0.1606, F1-Score: 0.9352
# 100%|██████████| 1563/1563 [02:29<00:00, 10.45it/s]
# Epoch [6/10], Loss: 0.1341, F1-Score: 0.9458
# 100%|██████████| 1563/1563 [02:29<00:00, 10.44it/s]
# Epoch [7/10], Loss: 0.1262, F1-Score: 0.9497
# 100%|██████████| 1563/1563 [02:29<00:00, 10.43it/s]
# Epoch [8/10], Loss: 0.1203, F1-Score: 0.9520
# 100%|██████████| 1563/1563 [02:29<00:00, 10.43it/s]
# Epoch [9/10], Loss: 0.1155, F1-Score: 0.9539
# 100%|██████████| 1563/1563 [02:30<00:00, 10.40it/s]
# Epoch [10/10], Loss: 0.1129, F1-Score: 0.9549

In [46]:
# Generate Predictions for Test Set
class TestDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_paths = [os.path.join(image_dir, img) for img in os.listdir(image_dir)]
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, os.path.basename(image_path)

In [47]:

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_dataset = TestDataset(test_dir, test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

In [49]:
model.eval()
submission = []
with torch.no_grad():
    for images, image_ids in tqdm(test_loader):
        images = images.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)
        submission.extend(zip(image_ids, preds.cpu().numpy()))


100%|██████████| 16/16 [00:01<00:00, 12.29it/s]


In [50]:
# Save Submission File
submission_df = pd.DataFrame(submission, columns=["ID", "Prediction"])
submission_df.to_csv("submission_epoch_15_batch_16_shubham.csv", index=False)

In [None]:
best_model_epoch_12_batch_16.pth

In [51]:
s = pd.read_csv('/kaggle/working/submission_epoch_15_batch_16_shubham.csv')
s.head()

Unnamed: 0,ID,Prediction
0,ID_43.jpg,1
1,ID_24.jpg,1
2,ID_319.jpg,1
3,ID_446.jpg,1
4,ID_178.jpg,0


In [52]:
t = []

for col in s['ID']:
    col = col.split('.')[0]
    col = col.split('_')[1]
    t.append(col)

In [53]:
s['ID2'] = t

In [54]:
s.head()

Unnamed: 0,ID,Prediction,ID2
0,ID_43.jpg,1,43
1,ID_24.jpg,1,24
2,ID_319.jpg,1,319
3,ID_446.jpg,1,446
4,ID_178.jpg,0,178


In [55]:
s.to_csv("/kaggle/working/test_epoch_15_batch_16_shubham.csv", index=False)