In [None]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())

In [None]:
class CelebDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.labels = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.labels.iloc[idx, 1])
        image = Image.open(img_name)

        if image.mode != 'RGB':
            image = image.convert('RGB')

        label = self.labels.iloc[idx, 2]

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])



In [None]:
import random
from torch.utils.data import random_split
random.seed(42)

dataset = CelebDataset(csv_file='./train.csv', root_dir='./train', transform=transform)

train_size = int(0.7 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

In [None]:
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import pandas as pd
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 56 * 56, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 56 * 56)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

num_classes = 100

label_encoder = LabelEncoder()
category = pd.read_csv("./category.csv")  
label_encoder.fit(category['Category'])

criterion = nn.CrossEntropyLoss()
model = CNN(num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.1)
state = {"model":model.state_dict(), "op":optimizer.state_dict()}
torch.save(state, "./model_regularized.pickle")

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

num_epochs = 5

for epoch in range(5,num_epochs+5):
    state = torch.load(f"./model_regularized.pickle")
    model = CNN(num_classes).to(device)
    model.load_state_dict(state['model'])
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=1.25)
    optimizer.load_state_dict(state["op"])
    model.train()
    train_loss = 0.0
    correct_train = 0
    total_train = 0
    
    for inputs, labels in tqdm(train_loader):
        # inputs =  inputs.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        encoded_labels = torch.tensor(label_encoder.transform(labels), dtype=torch.long, device=device)
        loss = criterion(outputs, encoded_labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total_train += encoded_labels.size(0)
        correct_train += (predicted == encoded_labels).sum().item()
    
    train_loss = train_loss / len(train_dataset)
    train_accuracy = correct_train / total_train
    
    val_loss = 0.0
    correct_val = 0
    total_val = 0
    
    model.eval()
    with torch.no_grad():
        for inputs, labels in tqdm(val_loader):
            inputs = inputs.to(device)
            outputs = model(inputs)
            encoded_labels = torch.tensor(label_encoder.transform(labels), dtype=torch.long, device=device)
            loss = criterion(outputs, encoded_labels)
            val_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total_val += encoded_labels.size(0)
            correct_val += (predicted == encoded_labels).sum().item()
    
    val_loss = val_loss / len(val_dataset)
    val_accuracy = correct_val / total_val
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}')
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {val_loss:.4f}')
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_accuracy:.4f}')
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {val_accuracy:.4f}')
    state = {"model":model.state_dict(), "op":optimizer.state_dict()}
    torch.save(state, f"./model_regularized.pickle")



print('Training finished!')


In [None]:
state = torch.load(f"./model_regularized5.pickle")
model = CNN(num_classes).to(device)
model.load_state_dict(state['model'])
class CelebTestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.image_filenames = os.listdir(root_dir)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.image_filenames[idx])
        image = Image.open(img_name)

        # Convert image to RGB if it's not already
        if image.mode != 'RGB':
            image = image.convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, self.image_filenames[idx]

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

# Create dataset and dataloader for test images
test_dataset = CelebTestDataset(root_dir='./test', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

print("Yep")
predicted_labels = []
image_names = []
model.eval()  
with torch.no_grad():
    for images, filenames in tqdm(test_loader):
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predicted_labels.append(predicted.item())
        image_names.append(filenames[0])


def decode_labels(predicted_labels, label_encoder):
    return label_encoder.inverse_transform(predicted_labels)

decoded_labels = decode_labels(predicted_labels, label_encoder)

results_df = pd.DataFrame({'Id': image_names, 'Category': decoded_labels})

results_df.to_csv('./predicted_test_results.csv', index=False)

In [None]:
df = pd.read_csv('./predicted_test_results.csv')

In [None]:
df["Id"] = df.Id.apply(lambda x: int(x.split(".")[0]))

df.sort_values("Id", inplace=True)

In [None]:
df.to_csv('./predicted_test_results.csv', index=False)

In [None]:
df