In [1]:
# !pip install timm
import numpy as np
import pandas as pd
import os
import timm
import torch
import torch.nn as nn
import torch.optim as optim 
import torch.nn.functional as F
import albumentations as A
from torch.utils.data import Dataset,DataLoader,WeightedRandomSampler,random_split
from PIL import Image
import cv2 as cv
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
import torchvision
import torchvision.transforms as transforms
from torchvision.ops import sigmoid_focal_loss
import torchmetrics
from torchmetrics.classification import MulticlassF1Score as F1Score
from sklearn.metrics import f1_score
torch.manual_seed(69)
np.random.seed(69)
import matplotlib.pyplot as plt



In [2]:
fold=np.sort(os.listdir('/kaggle/input/ai-of-god-v20/train'))

In [3]:
device=torch.device('cuda')

In [4]:
transform = A.Compose(
    [
        A.Resize(256,256),
        A.RandomCrop(224,224),
#         A.CenterCrop(224,224),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        ToTensorV2()
    ]
)

In [5]:
class dataset(Dataset):
    def __init__(self,folder=None,labels=None):
        self.transform=transform
        self.folder=folder
        self.plabels=pd.read_csv(labels)
        self.lis=np.sort(os.listdir(folder))
        self.labels=np.array(pd.read_csv(labels))
        
    def __len__(self):    
        return self.plabels.shape[0]
    def __getitem__(self,idx):
        y = torch.tensor((np.array(self.plabels[self.plabels['FileName']==self.lis[idx]]['Class']))
                             ,dtype=torch.long)
#         if y != 0:
        img=cv.cvtColor(np.array(Image.open(self.folder+'/'+self.lis[idx])),cv.COLOR_GRAY2RGB)
        x=self.transform(image=img)
        x['image']=(x['image'].type(torch.float32)).to(device)
        return{
            'x':x['image'],
            'y':y.to(device),
        }
        
    def labels_counts(self):
        a,b=np.unique(self.labels[:,1],return_counts=True)
        size={}
        for i in range(a.size):
            size[a[i]]=b[i]
        return size

In [6]:
data=dataset('/kaggle/input/ai-of-god-v20/train','/kaggle/input/ai-of-god-v20/train.csv')

In [7]:
len(data)

8201

In [8]:
train,val=random_split(data,[7000,len(data)- 7000])
print(train[0])

{'x': tensor([[[ 0.0569,  0.0912,  0.1426,  ...,  0.1939,  0.2624,  0.3652],
         [ 0.0569,  0.1083,  0.1597,  ...,  0.1768,  0.2282,  0.3652],
         [ 0.0398,  0.1254,  0.1597,  ...,  0.1768,  0.2282,  0.3652],
         ...,
         [-0.2684, -0.3198, -0.3712,  ...,  0.2967,  0.3481,  0.3138],
         [-0.2513, -0.3541, -0.4568,  ...,  0.2967,  0.3138,  0.2796],
         [-0.1999, -0.3198, -0.4568,  ...,  0.2453,  0.2453,  0.2111]],

        [[ 0.1877,  0.2227,  0.2752,  ...,  0.3277,  0.3978,  0.5028],
         [ 0.1877,  0.2402,  0.2927,  ...,  0.3102,  0.3627,  0.5028],
         [ 0.1702,  0.2577,  0.2927,  ...,  0.3102,  0.3627,  0.5028],
         ...,
         [-0.1450, -0.1975, -0.2500,  ...,  0.4328,  0.4853,  0.4503],
         [-0.1275, -0.2325, -0.3375,  ...,  0.4328,  0.4503,  0.4153],
         [-0.0749, -0.1975, -0.3375,  ...,  0.3803,  0.3803,  0.3452]],

        [[ 0.4091,  0.4439,  0.4962,  ...,  0.5485,  0.6182,  0.7228],
         [ 0.4091,  0.4614,  0.5136,  .

In [9]:
print(len(train))
len(val)

7000


1201

In [10]:
model_timm=timm.create_model("efficientnet_b0",pretrained=True)


Downloading model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

In [11]:
# idx = 1035
# img=cv.cvtColor(np.array(Image.open('/kaggle/input/ai-of-god-v20/train/'+str(idx)+'.jpg')),cv.COLOR_GRAY2RGB)
# x=transform(image=img)
# # x = img
# for k,v in x.items():
#     p
model_timm

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNormAct2d(
          32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
      

In [12]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x
for param in model_timm.parameters():
    param.requires_grad = False
model_timm.classifier=Identity()
model_timm.classifier=nn.Linear(1280,8)
model_timm.classifier.requires_grad=True
model = model_timm
# model = nn.Sequential(model_timm,
#                       nn.BatchNorm1d(256),
#                       nn.ReLU(),
#                       nn.Dropout(0.5),
#                       nn.Linear(256,64),
#                       nn.BatchNorm1d(64),
#                       nn.ReLU(),
#                       nn.Dropout(0.5),
#                       nn.Linear(64,8),
# #                       nn.Softmax(dim = 1)
#                      )

In [13]:
# count=np.zeros((8))
# for i in range(len(train)):
#     count[train[i]['y']]=count[train[i]['y']]+1

In [14]:
# count
# model[-7]

In [15]:
# weights={}
# for i in range(len(count)):
#     weights[i]=len(train)/count[i]
# # weights[0]/=10
# # weights[1]/=5
# # weights[2]/=4
# # weights[4]/=4
# # weights[6]/=4
# lweights=[weights[i] for i in range(8)]

In [16]:
# lweights

In [17]:
train[0]['y']

tensor([0], device='cuda:0')

In [18]:
# weight_list=[]
# for i in range(len(train)):
#     weight_list.append(lweights[int(train[i]['y'])])
# t_weight=torch.tensor(weight_list)

In [19]:
# sampler=WeightedRandomSampler(t_weight.type(torch.DoubleTensor),len(weight_list))
batch_size = 32
data_load=DataLoader(dataset=train,batch_size=batch_size, shuffle = True)
val_load=DataLoader(dataset=val,batch_size=8)
len(data_load)

219

In [20]:
for data in train:
    print(type(data['x']))
    break

<class 'torch.Tensor'>


In [21]:
test_csv = np.sort(os.listdir('/kaggle/input/ai-of-god-v20/test'))
test_csv

array(['1.jpg', '10.jpg', '100.jpg', ..., '997.jpg', '998.jpg', '999.jpg'],
      dtype='<U8')

In [22]:
model=model.to(device)

In [23]:
batch_size = 32

In [None]:
optimizer=torch.optim.Adam(model.parameters(),lr = 1e-5)
criterion = nn.CrossEntropyLoss()
model.train()
epoch_losses=[]
n_total_steps=len(data_load)
num_epochs=20
f1=F1Score(num_classes=8)
for epoch in range(num_epochs):
    epoch_loss=0
    val_score=0
    epoch_acc=0
    for i,data in enumerate(data_load):
        x=data['x'].to(device)
        y=data['y'].to(device)
        one_hot_labels=((F.one_hot(y,num_classes=8)).reshape(data['x'].shape[0],8)).type(torch.float32)
        outputs=model(x)
#         print(outputs.shape)
#         loss=sigmoid_focal_loss(outputs,one_hot_labels,reduction='sum')
        loss = criterion(outputs,y.flatten())
        epoch_loss+=(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_acc += torch.sum(y==torch.argmax(outputs, axis = 1, keepdim = True))
        print(f"epoch: {epoch+1}/{num_epochs}| step: {i+1}/{n_total_steps}, loss: {epoch_loss/(i+1):.3f}")
    print(f"epoch: {epoch+1}/{num_epochs} ended with accuracy: {epoch_acc/batch_size}")
    epoch_losses.append(epoch_loss)

In [None]:
epoch_losses

# del val_load
# del data_load

In [None]:
val_load=DataLoader(dataset=val,batch_size=8)
f1=F1Score(num_classes=8)
y_preds = []
model.eval()

# model[-3] = Identity()
# model[-7] = Identity()
for data in val_load:
    x=data['x']
    y=data['y']
    predicted=model(x)
    y_preds.append(torch.argmax(predicted, keepdims = True, axis = 1))
val_load=DataLoader(dataset=val,batch_size=2048)
y = None
y_fin = [i for j in y_preds for i in j]
y_fin = torch.tensor(y_fin).reshape(-1,1)
for data in val_load:
#     print(data['y'])
    y = data['y']
    
# print(torch.count_nonzero(y))
y=y.detach().cpu()
np_yfin=np.array(y_fin)
np_y=np.array(y)
print(np_y.shape)
print(np_yfin.shape)
print(f1_score(np_yfin, np_y, average = 'macro'))

In [None]:
# class tdataset(Dataset):
#     def __init__(self,folder=None):
#         self.model=model.eval()
#         self.transform=ttransform
#         self.folder=folder
#         self.lis=np.sort(os.listdir(folder))
#     def __len__(self):    
#         return self.lis.size
#     def __getitem__(self,url):
#         img=cv.cvtColor(np.array(Image.open(self.folder+'/'+url)),cv.COLOR_GRAY2RGB)
#         x=self.transform(image=img)
#         x['image']=((x['image'].reshape((1,*x['image'].shape))).type(torch.float32)).to(device)
#         return torch.argmax(self.model(x['image']),dim = 1).detach().cpu().numpy()

class tdataset(Dataset):
    def __init__(self,folder,transform=None):
        self.dir = folder
        self.transform = transform
        
    def __len__(self):
        return len(os.listdir(self.dir))
    
    def __getitem__(self,idx):
        img=cv.cvtColor(np.array(Image.open(self.dir+str(idx+1)+'.jpg')),cv.COLOR_GRAY2RGB)
        img = self.transform(image=img)
        dic = {'image':img['image']}
        return dic

In [None]:
test=tdataset('/kaggle/input/ai-of-god-v20/test/',transform)

In [None]:
test_load = DataLoader(test, batch_size=256, shuffle=False)

In [None]:
test[0]


In [None]:
predictions =[]
for data in test_load:
    images = data['image'].to(device)
        
    with torch.no_grad():
        output = model(images)
        predictions.append(output.to('cpu').numpy())

predictions = np.concatenate(predictions)
predictions = predictions.argmax(1)

In [None]:
len(predictions)

In [None]:
subm=pd.read_csv("/kaggle/input/ai-of-god-v20/test.csv")

In [None]:
# subm = pd.read_csv('/kaggle/input/ai-of-god-v20/sample_submission.csv', index_col = "FileName")
# for i in range(len(subm.index)):
#     subm.iloc[i,0]=test[subm.index[i]]

In [None]:
# y = data['y'].detach().cpu()
# print("Predicted percentage of each class in Test:\n",torch.tensor(subm.value_counts()/subm.value_counts().sum()))
# f,a = plt.subplots(nrows = 1, ncols = 2)
# a[0].bar(np.arange(8),np.bincount(subm.iloc[:,0].to_numpy())/np.sum(subm.value_counts()))
# a[0].set_title('Test Distrib.')
# print("Percentage of each class in Train:\n",torch.bincount(y.reshape(-1))/torch.sum(torch.bincount(y.reshape(-1))))
# a[1].bar(np.arange(8),(torch.bincount(y.reshape(-1)))/torch.sum(torch.bincount(y.reshape(-1))))
# a[1].set_title('Train Distrib.')

# plt.show()

In [None]:
subm['Class']=predictions
b=np.bincount(predictions)
print(b)
subm.to_csv('submission.csv',index=False)
subm