In [None]:
# Pytorch model to do classification. Dataset from folder

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import pandas as pd
from PIL import Image
import os

In [None]:
class SCELoss(torch.nn.Module):
    def __init__(self, alpha, beta, num_classes=7):
        super(SCELoss, self).__init__()
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.alpha = alpha
        self.beta = beta
        self.num_classes = num_classes
        self.cross_entropy = torch.nn.CrossEntropyLoss()

    def forward(self, pred, labels):
        # CCE
        ce = self.cross_entropy(pred, labels)

        # RCE
        pred = F.softmax(pred, dim=1)
        pred = torch.clamp(pred, min=1e-7, max=1.0)
        label_one_hot = torch.nn.functional.one_hot(
            labels, self.num_classes).float().to(self.device)
        label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
        rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1))

        # Loss
        loss = self.alpha * ce + self.beta * rce.mean()
        return loss


In [None]:
# Use pretrained model (Resnet)
model = torchvision.models.resnet34(pretrained=True)

In [None]:
# Input RGB image, normalise to resnet18 and all those stuff
transform = transforms.Compose(
    [transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225])])


In [None]:
# Load dataset from folder
dataset = torchvision.datasets.ImageFolder(root='/home/venom/repo/Stylumia-Internship-Kaggle/Dataset/Modified/', transform=transform)

In [None]:
# Random split dataset into train and test
train_size = int(0.7 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = torch.utils.data.random_split(dataset, [train_size, test_size])


In [None]:
# Dataloader
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                            shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                            shuffle=False, num_workers=2)


In [None]:
# Freeze all layers except last fc layer
for param in model.parameters():
    param.requires_grad = False
    
# Change last fc layer to 7 classes
model.fc = nn.Linear(512, 7)

# Make sure last layer is trainable
# Make sure the last layer is trainable
for param in model.fc.parameters():
    param.requires_grad = True


In [None]:
# Use GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# # load model checkpokint
# model.load_state_dict(torch.load(
#     '/home/venom/repo/Stylumia-Internship-Kaggle/RawData/model.pth'))


In [None]:
# Loss function and optimizer
criterion = SCELoss(alpha=0.1, beta=1.0, num_classes=7)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Train, test in each epoch with tqdm progress bar, use functions for train and test
from tqdm import tqdm

def train(trainloader):
    model.train()
    running_loss = 0.0
    for i, data in tqdm(enumerate(trainloader, 0)):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    print("Training loss: ", running_loss/len(trainloader))

def test(testloader):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in tqdm(enumerate(testloader, 0)):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
    print("Testing loss: ", running_loss/len(testloader))
    print("Accuracy: ", correct/total)


In [None]:
# Train and test for 10 epochs
for epoch in range(10):
    print("Epoch: ", epoch)
    train(trainloader)
    test(testloader)

In [None]:
# Unfreeze all layers, and fine tune with smaller learning rate
for param in model.parameters():
    param.requires_grad = True

optimizer = optim.Adam(model.parameters(), lr=0.0001)


In [None]:
# Train and test for 10 epochs
for epoch in range(10):
    print("Epoch: ", epoch)
    train(trainloader)
    test(testloader)


In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.00001)

In [None]:
# Train and test for 10 epochs
for epoch in range(10):
    print("Epoch: ", epoch)
    train(trainloader)
    test(testloader)


In [None]:
for epoch in range(5):
    print("Epoch: ", epoch)
    train(testloader)
    test(trainloader)


In [None]:
test(testloader)

In [None]:
# Make predictions with this model, on test directory, and epxort to csv as per kaggle format
import pandas as pd
import os
from PIL import Image

# Load test directory
test_dir = '/home/venom/repo/Stylumia-Internship-Kaggle/Dataset/test'

# Submission df structure - file_name,label - Lable is predicted class (0-6)
submission_df = pd.DataFrame(columns=['file_name', 'label'])


# Iterate over all files in test directory
for file in tqdm(os.listdir(test_dir)):
    # Open image, resize, convert to tensor, normalise
    img = Image.open(os.path.join(test_dir, file))
    img = transform(img)
    img = img.unsqueeze(0)
    img = img.to(device)
    
    # Get prediction
    output = model(img)
    _, predicted = torch.max(output.data, 1)
    
    # Add to submission df using pandas concat
    submission_df = pd.concat([submission_df, pd.DataFrame([[file, predicted.item()]], columns=['file_name', 'label'])], ignore_index=True)


In [None]:
# Export to csv
submission_df.to_csv('SCE_34.csv', index=False)


In [None]:
# Save model
torch.save(model.state_dict(), 'SCE_34.pth')
