In [1]:
import os
import gc
import cv2
import copy
import time
import torch
import random
import string
import joblib
import tifffile
import numpy as np 
import pandas as pd 
import torch.nn as nn
import seaborn as sns
from random import randint
from torchvision import models
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import warnings; warnings.filterwarnings("ignore")
from tqdm import tqdm

gc.enable()

In [2]:
debug = False
generate_new = True
test_df = pd.read_csv("../input/mayo-clinic-strip-ai/test.csv")
dirs = ["../input/mayo-clinic-strip-ai/train/", "../input/mayo-clinic-strip-ai/test/"]
test_df

try:
    os.mkdir("../test/")
except:
    pass
for i in tqdm(range(test_df.shape[0])):
    img_id = test_df.iloc[i].image_id
    try:
        sz = os.path.getsize(dirs[1] + img_id + ".tif")
    except:
        sz = 1000000000
    if(sz > 8e8):
        img = np.zeros((1024,1024,3), np.uint8)
    else:
        try:
            img = cv2.resize(tifffile.imread(dirs[1] + img_id + ".tif"), (1024, 1024))
        except:
            img = np.zeros((1024,1024,3), np.uint8)
    cv2.imwrite(f"../test/{img_id}.jpg", img)
    del img
    gc.collect()
    
    
class ImgDataset(Dataset):
    def __init__(self, df):
        self.df = df 
        self.train = 'label' in df.columns
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        paths = ["../test/", "../train/"]
        image = cv2.imread(paths[self.train] + self.df.iloc[index].image_id + ".jpg").astype(np.float32)/255

        label = 0
        try:
            if len(image.shape) == 5:
                image = image.squeeze().transpose(1, 2, 0)
            image = cv2.resize(image, (1024, 1024)).transpose(2, 0, 1)
        except:
            image = np.zeros((3, 1024, 1024))
        if(self.train):
            label = {"CE" : 0, "LAA": 1}[self.df.iloc[index].label]
        patient_id = self.df.iloc[index].patient_id
        return image, label, patient_id

100%|██████████| 4/4 [00:34<00:00,  8.58s/it]


In [3]:
def predict(model, dataloader):
    model.cuda()
    model.eval()
    dataloader = dataloader
    outputs = []
    s = nn.Softmax(dim=1)
    ids = []
    for item in tqdm(dataloader, leave=False):
        patient_id = item[2][0]
        images = item[0].cuda().float()
        ids.append(patient_id)
        output = model(images)
        outputs.append(s(output.cpu()[:,:2])[0].detach().numpy())
        
        torch.cuda.empty_cache()
        del images
        gc.collect()
    return np.array(outputs), ids

paths = ['../input/mayo-model/efficientnet_model_acc_0.6386_b0.pth']

models = []
for path in paths:
    models.append(torch.jit.load(path))
    
batch_size = 1
test_loader = DataLoader(
    ImgDataset(test_df), 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=1
)
sub_df = pd.read_csv('../input/mayo-clinic-strip-ai/sample_submission.csv')
sub_df['CE'] = 0
sub_df['LAA'] = 0

for model in models:
    anss, ids = predict(model, test_loader)
    #print(anss, ids)
    prob = pd.DataFrame({"CE" : anss[:,0], "LAA" : anss[:,1], "id" : ids}).groupby("id").mean()
    submission = pd.read_csv("../input/mayo-clinic-strip-ai/sample_submission.csv")
    submission.CE = prob.CE.to_list()
    submission.LAA = prob.LAA.to_list()
    
    sub_df['CE'] = sub_df['CE'].values + submission['CE'].values
    sub_df['LAA'] = sub_df['LAA'].values + submission['LAA'].values

for i in range(len(sub_df['CE'])):
    if sub_df['CE'][i] > 0.6:
        sub_df['CE'][i] = 0.53
        sub_df['LAA'][i] = 0.47
    elif sub_df['LAA'][i] > 0.6:
        sub_df['CE'][i] = 0.47
        sub_df['LAA'][i] = 0.53
    else:
        sub_df['CE'][i] = 0.5
        sub_df['LAA'][i] = 0.5
        
sub_df.to_csv("submission.csv", index = False)

                                             