In [None]:
!pip install -q openpyxl

# Links to the dataset and trained model

> Dataset: https://www.kaggle.com/datasets/himanshunayal/damage-detect

> Trained model: https://www.kaggle.com/datasets/himanshunayal/damage-detection-models

# Importing Libraries

In [None]:
import gc
import os
import sys
import cv2
import random
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score 
from sklearn.model_selection import StratifiedKFold,GroupKFold

import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
import warnings
warnings.filterwarnings('ignore')

# import torch_xla.core.xla_model as xm

In [None]:
class cfg:
    train_dir = '../input/damage-detect/Train (1)/'
    test_dir = '../input/damage-detect/Test/'
    n_splits = 5
    seed = 69
    epochs = 50
    factor = 0.2
    eps = 1e-6
    lr = 1e-4
    batch_size = 8 
    dt_classes = 6
    de_classes = 4
    min_lr = 1e-6
    weight_decay = 1e-6
    reduce_plat = 4
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#     device = xm.xla_device()

def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=cfg.seed)

# Preprocessing

In [None]:
df = pd.read_excel(cfg.train_dir+'training_label.xlsx').drop('subset',axis=1)
df['class'] = df['class'].apply(lambda x: [c for c in x.split(',')])
df['image'] = df['image'].apply(lambda x:x.split('/')[1])
df[['head_lamp','tail_lamp','glass_shatter','scratch','dent','unknown']] = 0

######## one hot encoding the labels
for i in df.index:
    for class_ in df.iloc[i]['class']:
        df.at[i,str(class_).strip()]=1      
    
df.head(5)

In [None]:
######################################
#### Customizing Train Dataset #######
######################################
class TrainDataset(Dataset):
    def __init__(self,df,cfg):
        self.df = df
        self.img_dir = cfg.train_dir+'Train/'
        self.image = df['image'].values
        self.labels_1 = df[['head_lamp','tail_lamp','glass_shatter','scratch','dent','unknown']].values
        self.labels_2 = df['extent_of_damage'].values
        self.transform = A.Compose([A.HorizontalFlip(p=0.5),
                                    A.VerticalFlip(p=0.5),
                                    A.augmentations.transforms.ColorJitter(),
                                    A.augmentations.geometric.rotate.Rotate(limit=90),
                                    A.augmentations.transforms.ChannelShuffle(),
                                    A.augmentations.transforms.AdvancedBlur(),
                                    A.augmentations.crops.transforms.RandomSizedCrop((180,180),224,224,p=0.7)
                                  ])
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        image_name = self.image[idx]
        label_1 = self.labels_1[idx]
        label_2 = self.labels_2[idx]
        image = cv2.imread(self.img_dir+image_name)
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        image = self.transform(image=image)['image']
        image = image/255.0
        
        return torch.tensor(image,dtype=torch.float32).permute(2,0,1) ,\
                torch.tensor(label_1,dtype=torch.float32),torch.tensor(label_2,dtype=torch.float32)
        

#######################################
#### Customizing Test Dataset #########
#######################################
class TestDataset(Dataset):
    def __init__(self,cfg):
        self.img_dir = cfg.test_dir+'Test/'
        self.image_name = os.listdir(self.img_dir)
        
    def __len__(self):
        return len(os.listdir(self.img_dir))
    
    def __getitem__(self,idx):
        image_name = self.image_name[idx]
        image = cv2.imread(self.img_dir+image_name)
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        image = image/255.0
        
        return torch.tensor(image,dtype=torch.float32).permute(2,0,1) ,torch.tensor(int(image_name.split('.')[0]))
        

**Sample Train Image**

In [None]:
trainset = TrainDataset(df,cfg)
img,lab_1,lab_2 = trainset[0]
plt.imshow(img.permute(1,2,0))
print(lab_1,lab_2)

**Sample Test Image**

In [None]:
testset = TestDataset(cfg)
img,image_name = testset[0]
print(image_name)
plt.imshow(img.permute(1,2,0))

In [None]:
skf = StratifiedKFold(n_splits=cfg.n_splits,shuffle=True, random_state=cfg.seed)
df['fold'] = -1

for fold , (train_idx,val_idx) in enumerate(skf.split(df['image'],df['extent_of_damage'])):
    df.loc[val_idx,'fold'] = fold

# Device Loading

In [None]:
def to_device(data,device):
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device,non_blocking=True)

class DeviceLoader:
    def __init__(self,dl,device):
        self.dl=dl
        self.device=device
    def __iter__(self):
        for x in self.dl:
            yield to_device(x,self.device)
    def __len__(self):
        return len(self.dl)

# Defining Loss and Accuracy

In [None]:
#######################################
#### customized accuracy metric #######
#######################################

def custom_metric(out,lab):
    def partial(o,l):
        for i,j in zip(o,l):
            if i==1 and j==0:
                return False
        return True

    ress = []
    for k in range(out.shape[0]):
        if np.count_nonzero((out[k]==lab[k])==False)==0:
            ress.append(1)

        elif partial(out[k],lab[k])==True:
            ress.append(0.5)

        else:
            ress.append(0)
    return np.mean(np.array(ress))*100

#######################################
#### accuracy for 1st multilabel cls ##
#######################################

def accuracy_1(outputs, labels):
    preds = (outputs.cpu().detach().numpy()>0.5).astype(float)
    labels = labels.cpu().detach().numpy()
    
    return custom_metric(preds,labels)  

#######################################
#### accuracy for 1st multiclass cls ##
#######################################

def accuracy_2(outputs,labels):
    preds = np.amax(outputs.cpu().detach().numpy(),axis=1)
    return np.mean(preds)

###########################################
#### weights for multiclassifier ##########
###########################################

def weights():
    _,counts = np.unique(df['extent_of_damage'],return_counts=True)
    weight_1 = counts[1]/counts[0]
    weight_2 = counts[1]/counts[1]
    weight_3 = counts[1]/counts[2]
    weight_4 = counts[1]/counts[3]
    return to_device(torch.FloatTensor([weight_1,weight_2,weight_3,weight_4]),cfg.device)

#######################################
#### epoch loss and accuracy ##########
#######################################

class Loss_Acc(nn.Module):
    def epoch_loss(self, batch):
        images, label_1,label_2 = batch 
        out_1,out_2 = self.forward(images)                  
        loss_1 = F.binary_cross_entropy(out_1, label_1) 
        loss_2 = F.cross_entropy(out_2,label_2.to(torch.int64),weight=weights())
        loss = loss_1 + loss_2
        return loss
    
    def epoch_score(self, batch):
        images, label_1,label_2 = batch 
        out_1,out_2 = self.forward(images)                    
        acc_1 = accuracy_1(out_1, label_1)
        acc_2 = accuracy_2(out_2, label_2)
        return list([acc_1,acc_2])

# Model

In [None]:
class CustomResNext(Loss_Acc):
    def __init__(self,n_classes,n_classes_):
        super().__init__()
        backbone_1 = models.resnext50_32x4d(pretrained=True)
        backbone_1.fc = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=backbone_1.fc.in_features, out_features=n_classes),
            nn.Sigmoid())
        self.model_1 = backbone_1
        
        backbone_2 = models.resnext50_32x4d(pretrained=True)
        backbone_2.fc = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=backbone_2.fc.in_features, out_features=n_classes_),
            nn.Softmax(dim=1))
        self.model_2 = backbone_2


    def forward(self, x):
        out_1 = self.model_1(x)
        out_2 = self.model_2(x)
        
        return out_1,out_2
    

In [None]:
model = to_device(CustomResNext(cfg.dt_classes,cfg.de_classes),cfg.device)

In [None]:
@torch.no_grad()
def evaluation(model,val_loader):
    model.eval()
    output_1 = [model.epoch_score(batch)[0] for batch in val_loader]
    output_2 = [model.epoch_score(batch)[1] for batch in val_loader]
    loss = [model.epoch_loss(batch) for batch in val_loader]
    epoch_acc_1 = torch.stack([torch.tensor(x,dtype=torch.float32) for x in output_1]).mean().item()
    epoch_acc_2 = torch.stack([torch.tensor(x,dtype=torch.float32) for x in output_2]).mean().item()
    epoch_loss = torch.stack([torch.tensor(x,dtype=torch.float32) for x in loss]).mean().item()
    return epoch_acc_1, epoch_acc_2, epoch_loss

In [None]:
def fit(model,df):
    torch.cuda.empty_cache()
    for fold in (range(5)):
        train_df = df[df['fold']!=fold]
        val_df = df[df['fold']==fold]
        trainset = TrainDataset(train_df,cfg)
        validset = TrainDataset(val_df,cfg)
        train_loader = DataLoader(trainset,batch_size=cfg.batch_size,num_workers=4,pin_memory=True)
        valid_loader = DataLoader(validset,batch_size=cfg.batch_size,num_workers=4,pin_memory=True)
        train_dl = DeviceLoader(train_loader,cfg.device)
        valid_dl = DeviceLoader(valid_loader,cfg.device)
        
        optimizer=torch.optim.Adam(model.parameters(),lr=1e-4,weight_decay=cfg.weight_decay)
        lr_sched=torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=cfg.factor,
                                                            patience=cfg.reduce_plat, verbose=False, eps=cfg.eps)

        history=[]
        for epoch in tqdm(range(cfg.epochs)):
            model.train()
            train_losses = []
            train_acc_1 = [] ### for damage type
            train_acc_2 = [] ### for damage extent
            for batch in train_dl:
                loss = model.epoch_loss(batch)
                acc = model.epoch_score(batch)
                train_losses.append(loss)
                train_acc_1.append(acc[0])
                train_acc_2.append(acc[1])
                loss.backward()

                optimizer.step()
                optimizer.zero_grad()

                lr_sched.step(loss)

            t_loss = torch.mean(torch.tensor(train_losses))
            t_acc_1 = torch.mean(torch.tensor(train_acc_1))
            t_acc_2 = torch.mean(torch.tensor(train_acc_2))
            val_acc_1, val_acc_2, val_loss = evaluation(model, valid_dl)
            history.append([t_acc_1,t_acc_2,t_loss,val_acc_1,val_acc_2,val_loss])
            print(f'###############Epoch_{epoch+1} score###############')
            print(f"train_loss:{t_loss},train_acc(damage_type): {t_acc_1},train_acc(damage_extent):{t_acc_2},val_loss: {val_loss},Val_acc(damage_type): {val_acc_1},Val_acc(damage_extent): {val_acc_2}")

        torch.save(model.state_dict(), f'./model_f{fold}.pt')
        
    return history

**uncomment below code for training**

In [None]:
# history = fit(model,df)

In [None]:
len(os.listdir('../input/damage-detect/Test/Test'))

In [None]:
test_loader = DataLoader(testset,batch_size=300,num_workers=4,pin_memory=True)
test_loader = DeviceLoader(test_loader,cfg.device)

In [None]:
def inference(n):    
    model.load_state_dict(torch.load(f'../input/damage-detection-models/model_f{n}.pt',map_location=cfg.device))
    preds_1 = []
    preds_2 = []
    images = []
    for batch in test_loader:
        with torch.no_grad():
            pred_1,pred_2 = model(batch[0])
        preds_1.append(pred_1)
        preds_2.append(pred_2)
        images.append(batch[1])
                
    return preds_1[0].cpu().detach().numpy(), preds_2[0].cpu().detach().numpy(), images[0].cpu().detach().numpy()


# Postprocessing and Voting

In [None]:
submission_file = pd.DataFrame(columns=['image','extent_of_damage'])

In [None]:
stacked_preds_1 = []
stacked_preds_2 = []
for i in tqdm(range(5)):
    preds_1, preds_2, images = inference(n=i)
    preds_1 = np.where(preds_1>0.5,1,0)
    preds_2 = np.argmax(preds_2,axis=1)
    stacked_preds_1.append(preds_1)
    stacked_preds_2.append(preds_2)
    
image_labels = ['image/'+str(image)+'.jpeg' for image in images]
    

In [None]:
def damage_type(stacked_preds_1):
    final_preds_1 = np.where(np.sum(stacked_preds_1,axis=0)>2,1,0)
    
    return final_preds_1
    
def damage_extent(stacked_preds_2):    
    transp = np.transpose(stacked_preds_2)
    final_preds_2 = []
    for i in transp:
        x = np.bincount(i).argmax()
        final_preds_2.append(x)

    return final_preds_2


In [None]:
voting_preds_1 = damage_type(stacked_preds_1)
voting_preds_2 =damage_extent(stacked_preds_2)

In [None]:
##########################################
### mapping predictions into dataframe ###
##########################################
submission_file[['head_lamp','tail_lamp','glass_shatter','scratch','dent','unknown']] = voting_preds_1
submission_file['extent_of_damage'] = voting_preds_2
submission_file['image'] = image_labels

# submission_file['head_lamp'] = np.where(submission_file['head_lamp']==1,str('head_lamp'),'')
# submission_file['tail_lamp'] = np.where(submission_file['tail_lamp']==1,str('tail_lamp'),'')
# submission_file['glass_shatter'] = np.where(submission_file['glass_shatter']==1,str('glass_shatter'),'')
# submission_file['scratch'] = np.where(submission_file['scratch']==1,str('scratch'),'')
# submission_file['dent'] = np.where(submission_file['dent']==1,str('dent'),'')
# submission_file['unknown'] = np.where(submission_file['unknown']==1,str('unknown'),'')

############################################################
### Removing unknown value if other damage type is there ###
############################################################

for i in submission_file.index:
    if submission_file['unknown'].loc[i]=='unknown':
        for col in ['head_lamp','tail_lamp','glass_shatter','scratch','dent']:
            if submission_file[col].loc[i]!='':
                submission_file['unknown'].loc[i]=''
                break
                
# ###########################
# ### fixing class column ###
# ###########################

# for i in submission_file.index:
#     list_ = []
#     for col in ['head_lamp','tail_lamp','glass_shatter','scratch','dent','unknown']:
#         if submission_file[col].loc[i] != '':
#             list_.append(submission_file[col].loc[i])
#     submission_file['class'].loc[i] = list_
    
# submission_file = submission_file.drop(['head_lamp','tail_lamp','glass_shatter','scratch','dent','unknown'],axis=1)

In [None]:
################################
### sorting w.r.t image_name ###
################################
submission_file['image_no.'] = submission_file['image'].apply(lambda x: int(x.split('/')[1].split('.')[0]))
submission_file = submission_file.sort_values(by='image_no.').drop('image_no.',axis=1).reset_index(drop=True)

In [None]:
#################################################################
## reindexing columns according to the compeition requirement ###
#################################################################
column_titles = ['image','dent','glass_shatter','head_lamp','scratch','tail_lamp','unknown','extent_of_damage']
submission_file = submission_file.reindex(columns=column_titles)

In [None]:
submission_file

In [None]:
submission_file.to_excel('submission.xlsx',index=False)