In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np 
import pandas as pd 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, random_split, DataLoader
from tqdm import tqdm
import torchvision.transforms as transforms 
from sklearn.metrics import f1_score
import timm
import cv2
import random
from torch.utils.data.dataset import Subset
from albumentations.pytorch.transforms import ToTensorV2
from PIL import Image
import albumentations as A
from torchvision.datasets import ImageFolder

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
df1 = pd.read_csv('/kaggle/input/ai-of-god-v20/train.csv')
df2 = pd.read_csv('/kaggle/input/ai-of-god-v20/test.csv')

In [None]:
augment = A.Compose([
    A.RandomRotate90(p=0.5),
    A.Transpose(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    ToTensorV2(),
])

In [None]:
class CustomDataset():
    def __init__(self,folder):
        self.folder = folder 
    def __len__(self):
        return len(os.listdir(self.folder))
    def __getitem__(self,idx):
        path = '/kaggle/input/ai-of-god-v20/train/' + str(df1['FileName'][idx])
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img  = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        img = np.expand_dims(img, axis=0)
        img = img.reshape(227,227,3)
        img=torch.from_numpy(img)

        y = df1['Class'][idx]
        return {'x' : img,'y': torch.tensor(y), 'path' : path}

In [None]:
class CustomSubset(Subset):
    def __init__(self, dataset, indices, additional_data):
        super().__init__(dataset, indices)
        self.additional_data = additional_data

    def __getitem__(self, index):
        if index < len(self.indices):
            return super().__getitem__(index)
        else:
            additional_index = index - len(self.indices)
            return self.additional_data[additional_index]

    def __len__(self):
        return len(self.indices) + len(self.additional_data)

In [None]:
def upsample_class(data,label):
    subset_indices = range(len(data))
    class_data = []
    for i in range(len(data)):
        if data[i]['y'] == label:
            class_data.append(data[i])
    upsampled_data = []
    for item in (class_data):
        img = cv2.imread(item['path'], cv2.IMREAD_GRAYSCALE)
        img  = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        img = np.expand_dims(img, axis=0)
        img = img.reshape(227,227,3)
        img = augment(image = img)['image']
        if type(train[i]['x'])== np.ndarray:
            train[i]['x']=torch.from_numpy(train[i]['x'])
        upsampled_data.append({'x': (img),'y': item['y'] ,'path' : 'NULL'})
    new_indices = subset_indices 
    custom_subset = CustomSubset(data, new_indices, upsampled_data)
    return custom_subset

In [None]:
dataset = CustomDataset('/kaggle/input/ai-of-god-v20/train')
dataset[5]['x'].shape
len(dataset)

In [None]:
split_ratio = 0.9

class_indices = {}

for i in range(len(dataset)):
    label = dataset[i]['y']
    if int(label) not in class_indices.keys():
        class_indices[int(label)] = []
    class_indices[int(label)].append(i)
    (class_indices[int(label)])
train_indices = []
val_indices = []
for label, indices in class_indices.items():
    split_idx = int(len(indices) * split_ratio)
    train_indices.extend(indices[:split_idx])
    val_indices.extend(indices[split_idx:])

train = Subset(dataset, train_indices)
val = Subset(dataset, val_indices)

In [None]:
len(train)

In [None]:
for i in range (len(train)):
    if type(train[i]['x'])== np.ndarray:
        train[i]['x']=torch.from_numpy(train[i]['x'])
#     print(i)

In [None]:
for i in range(1,8):
    train = upsample_class(train,i)

In [None]:
len(train)

In [None]:
for i in range (len(train)):
    if type(train[i]['x'])== np.ndarray:
        train[i]['x']=torch.from_numpy(train[i]['x'])

In [None]:
 Cfg ={
    'model_name'  : 'efficientnet_b0',
    'classes' : 8,
    'size' : 227,
    'batch_size' : 16,
    'epochs': 8,
    'num_workers' : 2,
    'lr': 1e-4,
    'weight_decay' : 1e-2
 }

In [None]:
train_transform = A.Compose([
    A.ToFloat(max_value=255.0, always_apply=True),
    A.RandomResizedCrop(224, 224),
    A.HorizontalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.Transpose(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.augmentations.geometric.transforms.ElasticTransform(),
    A.augmentations.geometric.rotate.Rotate(p=0.5),
    A.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
    ToTensorV2(),
])

val_transform =  A.Compose([
    A.ToFloat(max_value=255.0, always_apply=True),
    A.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
    ToTensorV2(),
])

In [None]:
# for i in range(len(train)):
#     if train[i]['path'] == 'NULL':
#         continue
#     img = cv2.imread(train[i]['path'], cv2.IMREAD_GRAYSCALE)
#     img  = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
#     img = np.expand_dims(img, axis=0)
#     img = img.reshape(227,227,3)
#     img = train_transform(image=img)['image']
#     train[i]['x'] = img

In [None]:
# for i in range(len(train)):
#     if train[i]['path'] == 'NULL':
#         continue
#     img = cv2.imread(train[i]['path'], cv2.IMREAD_GRAYSCALE)
#     img  = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
#     img = np.expand_dims(img, axis=0)
#     img = img.reshape(227,227,3)
#     img = train_transform(image=img)['image']
#     train[i]['x'] = img
# for i in range(len(val)):
#     img = cv2.imread(val[i]['path'], cv2.IMREAD_GRAYSCALE)
#     img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
#     img = np.expand_dims(img, axis=0)
#     img = img.reshape(227,227,3)
#     img = val_transform(image=img)['image']
#     val[i]['x'] = img

In [None]:
class CNN(nn.Module):
    def __init__(self,model_name):
        super().__init__()
        self.model = timm.create_model(model_name,pretrained = True)
        num_features =  self.model.classifier.in_features
        self.model.classifier = nn.Linear(num_features,Cfg['classes'])
    def forward(self,x):
        return (self.model(x))      

In [None]:
model = CNN(Cfg['model_name'])

In [None]:
for i in range(len(train)):
    train[i]['x'] = train[i]['x'].reshape(227,227,3)

In [None]:
train_loader = DataLoader(train, batch_size=Cfg['batch_size'], shuffle=True, 
                          num_workers=Cfg['num_workers'],pin_memory=True, drop_last=True)

In [None]:
# train_loader = DataLoader(train, batch_size=Cfg['batch_size'], shuffle=True, 
#                           num_workers=Cfg['num_workers'],pin_memory=True, drop_last=True)
# valid_loader = DataLoader(val, batch_size=Cfg['batch_size'], shuffle=False, 
#                           num_workers=Cfg['num_workers'], pin_memory=True, drop_last=False)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr= Cfg['lr'],weight_decay = Cfg['weight_decay'])
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1, verbose=True)

In [None]:
for param in model.model.classifier.parameters():
    param.requires_grad = True

In [None]:
y_valid_all=np.zeros(len(val))
for i in range (len(val)):
    y_valid_all[i] = val[i]['y']

In [None]:
best_score = -1
for i in range(Cfg['epochs']):
    model.to(device).train()
    running_loss = 0.0
    count = 0
    for trai in tqdm(train_loader):
        images=torch.zeros((len(trai['x']),3, 224, 224))
        for j in range(len(trai['x'])):
            images[j] = train_transform1(image=np.array(trai['x'][j]))['image']
        images = images.to(device)
        labels = trai['y'].to(device)
        labels=torch.cat((labels, labels), dim=0)
        #images = images.permute(0, 3, 1, 2)
        images = images.float()
        y_preds =  (model(images))
        loss = criterion(y_preds, labels)
        running_loss += loss.item()*labels.shape[0]
        count += 1
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    avr_train_loss = running_loss/count
    print(f'Epoch {i+1} - average train loss : {avr_train_loss:.4f}')
    
    model.to(device).eval()
    preds = []
    running_loss = 0
    count = 0
    valid_labels = []
    for vali in tqdm(valid_loader):
#         images = vali['x'].to(device)
        labels = vali['y'].to(device)
#         images = images.permute(0,3,1,2)
        with torch.no_grad():
#             images = images.float()
            y_preds = model(vali['x'].to(device).float())
        loss = criterion(y_preds, labels)
        running_loss += loss.item()*labels.shape[0]
        count += 1
        preds.append(y_preds.softmax(1).to('cpu').numpy())
    predictions = np.concatenate(preds)
    aver_val_loss = (running_loss/count)
    scheduler.step()
    
    score = f1_score(y_valid_all, predictions.argmax(1),average = 'macro')
    print("score: ", score)
    if score > best_score:
        print('Score Improved')
        best_score = score
        print(f'Epoch {i+1} - Save Best Score: {best_score:.4f}')
        torch.save({'model': model.state_dict(), 
                    'preds': preds,
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict()},
                    './'+'efficientnet_b0_best.pth')

In [None]:
best_score

In [None]:
check_point = torch.load('./'+'efficientnet_b0_best.pth')

In [None]:
class TestDataset():
    def __init__(self,folder):
        self.folder = folder 
    def __len__(self):
        return len(os.listdir(self.folder))
    def __getitem__(self,idx):
        path = self.folder + '/' + str(df2['FileName'][idx])
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img  = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        img = np.expand_dims(img, axis=0)
        img = img.reshape(227,227,3)
        return {'x' : img, 'path' : path}

In [None]:
test_dataset = TestDataset('/kaggle/input/ai-of-god-v20/test')

In [None]:
# for i in range(len(val)):
#     img = cv2.imread(test_dataset[i]['path'], cv2.IMREAD_GRAYSCALE)
#     img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
#     img = np.expand_dims(img, axis=0)
#     img = img.reshape(227,227,3)
#     img = val_transform(image=img)['image']
#     test_dataset[i]['x'] = img

In [None]:
len(test_dataset)

In [None]:
def test_fn(test_loader, model, device):
    model.to(device).eval()
    
    preds= []
    for i, data in enumerate(tqdm(test_loader,total = len(test_loader))):
        images = data['x'].to(device)
        images = images.permute(0,3,1,2)
        with torch.no_grad():
            images = images.float()
            output = model(images)
            preds.append(output.softmax(1).to('cpu').numpy())
            
    predictions = np.concatenate(preds)
    predictions = predictions.argmax(1)
    
    return predictions   

In [None]:
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)
model1 = CNN(Cfg['model_name'])
model.load_state_dict(check_point['model'])
pred = test_fn(test_loader, model, device)

In [None]:
len(test_loader)

In [None]:
len(pred)

In [None]:
df2['Class'] = pred

In [None]:
df2

In [None]:
df2.to_csv('efficientnet_b0_upsampled(4).csv',index=False)