In [None]:
import pandas as pd
from pathlib import Path

In [None]:
!pip install -q timm


In [None]:
# Python library used for working with arrays.
import numpy as np

# Python library to interact with the file system.
import os

# Software library written for data manipulation and analysis. 
import pandas as pd

# fastai library for computer vision tasks
from fastai.vision.all import *
from fastai.metrics import *

# Developing and training neural network based deep learning models.
import torch


In [None]:
dataset_path = Path('../input/ranzcr-clip-catheter-line-classification')


In [None]:
train_df = pd.read_csv(dataset_path/'train.csv')


In [None]:
train_df.head()


In [None]:
train_df['path'] = train_df['StudyInstanceUID'].map(lambda x:str(dataset_path/'train'/x)+'.jpg')
train_df = train_df.drop(columns=['StudyInstanceUID'])
train_df.head(10)


In [None]:
# Transforms we need to do for each image in the dataset (ex: resizing).
item_tfms = RandomResizedCrop(384, min_scale=0.75, ratio=(1.,1.)) 

# Transforms that can take place on a batch of images (ex: many augmentations).
batch_tfms = [*aug_transforms(size=384, max_warp=0), Normalize.from_stats(*imagenet_stats)]


In [None]:
label_names = list(train_df.columns[:11])


In [None]:
data = DataBlock(blocks=(ImageBlock, MultiCategoryBlock(encoded=True, vocab=label_names)), # multi-label target
                 splitter = RandomSplitter(seed=42),# split data into training and validation subsets.
                 get_x = ColReader(12),# obtain the input images.
                 get_y = ColReader(list(range(11))), # obtain the targets.
                 item_tfms = item_tfms,
                 batch_tfms = batch_tfms)


In [None]:
dls = data.dataloaders(train_df,bs=16)

# We can call show_batch() to see what a sample of a batch looks like.
dls.show_batch()


In [None]:
model = torch.hub.load('facebookresearch/deit:main', 'deit_base_patch16_384', pretrained=True)


In [None]:
model.head


In [None]:
model.head = nn.Sequential(nn.Dropout(0.25), 
                           nn.Linear(768, 11))

model.head


In [None]:
learn = Learner(dls, model, metrics = [accuracy_multi])


In [None]:
learn.lr_find()


In [None]:
learn.fine_tune(1, base_lr=1.2022644114040304e-05)


In [None]:
sample_df = pd.read_csv(dataset_path/'sample_submission.csv')
sample_df.head()


In [None]:
_sample_df = sample_df.copy()
_sample_df['PatientID'] = 'None'
_sample_df['path'] = _sample_df['StudyInstanceUID'].map(lambda x:str(dataset_path/'test'/x)+'.jpg')
_sample_df = _sample_df.drop(columns=['StudyInstanceUID'])
test_dl = dls.test_dl(_sample_df)


In [None]:
test_dl.show_batch()


In [None]:
# Return predictions on the ds_idx dataset or dl using Test Time Augmentation
preds, _ = learn.tta(dl=test_dl,n=3)


In [None]:
submission_df = sample_df
for i in range(len(submission_df)):
    for j in range(len(label_names)):
        submission_df.iloc[i, j+1] = preds[i][j].numpy().astype(np.float32)


In [None]:
submission_df.head(10)


In [None]:
submission_df.to_csv(f'submission.csv', index=False)
