In [185]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
from PIL import Image

In [186]:
from google.colab import drive
drive.mount('/content/drive')
import os

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [187]:
link = '/content/drive/MyDrive/Ativities/NextGenAI Camp/Onsite/Hackathon/dataset-nextgen-Day1'
train = pd.read_csv(os.path.join(link, 'train.csv'))
train = train.drop(columns='id')

In [188]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12295 entries, 0 to 12294
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Image   12295 non-null  object 
 1   MOS     12295 non-null  float64
 2   Class   12295 non-null  object 
dtypes: float64(1), object(2)
memory usage: 288.3+ KB


In [189]:
temp = {}
for index, label in enumerate(train['Class'].unique()):
  temp[label] = index
train.Class = train['Class'].map(temp)

In [190]:
train = train.iloc[0:10]

In [191]:
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], # ImageNet
                [0.229, 0.224, 0.225])
])

class Image_Dataset(Dataset):
  def __init__(self, dataframe, transform=None):
    self.df = dataframe
    self.transform = transform

  def __len__(self):
    return len(self.df)

  def __getitem__(self, index):
    img_path = self.df.iloc[index]['Image']
    img = Image.open(link + '/' + img_path).convert('RGB')
    if self.transform:
      image = self.transform(img)

    mos = torch.tensor(self.df.iloc[index]['MOS'], dtype=torch.float32)
    label = torch.tensor(self.df.iloc[index]['Class'], dtype=torch.long)
    return image, mos, label

dataset = Image_Dataset(train, transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [192]:
class CNN(nn.Module):
  def __init__(self, num_label):
    super(CNN, self).__init__()
    self.features = nn.Sequential(
        nn.Conv2d(3, 32, 3, stride=1, padding=1),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.MaxPool2d(2),

        nn.Conv2d(32, 64, 3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(2),

        nn.Conv2d(64, 128, 3, stride=1, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.AdaptiveAvgPool2d((7, 7))
    )

    self.fc = nn.Sequential(
        nn.Flatten(),
        nn.Linear(128 * 7 * 7, 256),
        nn.ReLU(),
        nn.Dropout(0.3)
    )

    self.regression = nn.Linear(256, 1) # MOS
    self.classification = nn.Linear(256, num_label) # label

  def forward(self, x):
    x = self.features(x)
    x = self.fc(x)
    mos = self.regression(x).squeeze(1)
    label = self.classification(x)
    return mos, label

In [193]:
device = 'cuda' if torch.cuda.is_availabel() else 'cpu'
# device = 'cpu'
num_label = len(train['Class'].unique())

model = CNN(num_label).to(device)
MSE = nn.MSELoss()
CrossEntropy = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epoch = 2
for i in range(0, epoch):
  model.train()
  total_loss = 0
  for img, mos, label in tqdm(dataloader, desc=f'Epoch {i+1} / {epoch}', leave=False):
    img, mos, label = img.to(device), mos.to(device), label.to(device)

    mos_hat, label_hat = model(img)

    regression_loss = MSE(mos_hat, mos)
    classification_loss = CrossEntropy(label_hat, label)
    loss = regression_loss + (0.5*classification_loss)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_loss += loss.item()



In [194]:
torch.save({
    'model_state': model.state_dict(),
    'class_to_index': temp
    }, 'CNN_model.pth')

In [197]:
final = os.path.join(link, 'test')
result = []
model = CNN(num_label)

x = torch.load('CNN_model.pth', map_location='cpu')
model.load_state_dict(x['model_state'])
label_to_index = x['class_to_index']
index_to_label = {}

for label_name, label_index in label_to_index.items():
  index_to_label[label_index] = label_name

model.eval()
for filename in tqdm(os.listdir(final), desc='Predicting'):
  if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
    img_path = os.path.join(final, filename)

    img = Image.open(img_path).convert('RGB')
    img = transform(img).unsqueeze(0)

    with torch.no_grad():
      mos_hat, label_hat = model(img)

    mos_hat = mos_hat.item()
    label_index = torch.argmax(label_hat, dim=1).item()
    label_name = index_to_label[label_index]

    id_img = filename.split('_')[-1].split('.')[0]

    result.append({
        'id' : int(id_img),
        'Predicted_MOS' : mos_hat,
        'Class' : label_name
    })

df = pd.DataFrame(result)
df = df.sort_values('id')
df = df.sort_values('id')
df.to_csv('My_Answer.csv', index=False)
df

Predicting: 100%|██████████| 2011/2011 [02:27<00:00, 13.62it/s]


Unnamed: 0,id,Predicted_MOS,Class
0,3430,0.267142,GaengJued
1,3560,0.298846,KhaoNiewMaMuang
2,3331,0.274039,KhaoNiewMaMuang
3,3813,0.310127,KhaoNiewMaMuang
4,3396,0.376321,KhaoNiewMaMuang
...,...,...,...
2006,10441,0.318805,KhaoNiewMaMuang
2007,10685,0.289608,KhaoNiewMaMuang
2008,10293,0.218817,GaiYang
2009,10214,0.241430,KhaoNiewMaMuang
