In [None]:
import torch
from torch.utils.data import Dataset,DataLoader
import torch.nn as nn
import torchvision.transforms as transforms

import pandas as pd

import os

from PIL import Image

import numpy as np

from timm.models import create_model


In [1]:
device = ("cuda" if torch.cuda.is_available() else "cpu")


TRAIN_DATA_DIR = "/dir/to/traindata/"
TEST_DATA_DIR = "/dir/to/testdata/"
TRAIN_LABELS_DIR=os.path.abspath("/dir/to/trainmetadata/SnakeCLEF2022-TrainMetadata.csv")
TEST_LABELS_DIR=os.path.abspath("/dir/to/testmetadata/SnakeCLEF2022-TestMetadata.csv")
MODEL_DIR = os.path.join("/dir/where/models/were/saved/")

COUNTRY_DIST_FILE="/dir/to/CountrySnakeDistributionTrainObservationLevel.csv"
COUNTRY_DIST_FILE_BIN="/dir/to/CountrySnakeDistributionTrainObservationLevelBin.csv"

IMAGE_SIZE = 380 

In [2]:
trainingDataset=pd.read_csv(TRAIN_LABELS_DIR)
trainingDataset["image_path"]=TRAIN_DATA_DIR+trainingDataset.file_path


validationDataset=pd.read_csv(TEST_LABELS_DIR)

validationDataset=pd.read_csv(TEST_LABELS_DIR)
validationDataset["image_path"]=TEST_DATA_DIR+validationDataset.file_path

NUM_TRAINING_SAMPLES = trainingDataset.shape[0]

NUM_VALIDATION_SAMPLES = validationDataset.shape[0]

In [3]:
class SnakeTrainDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data=data
        self.transform = transform

    def __len__(self):
        return self.data.shape[0]


    def __getitem__(self, index):
        img_obj = self.data.iloc[index]
        img = Image.open(img_obj.image_path).convert("RGB")
        observationid=img_obj.observation_id
        file_path=img_obj.file_path
        if self.transform is not None:
            img = self.transform(img)

        return (img,observationid,file_path)

In [4]:
BATCH_SIZE_VALID=20

transformValid = transforms.Compose(
        [
            transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ]
    )
datasetValid=SnakeTrainDataset(validationDataset,transform=transformValid)

validation_loader = DataLoader(dataset=datasetValid, shuffle=False, batch_size=BATCH_SIZE_VALID)

In [5]:
num_epochs = 10
learning_rate = 0.001
train_CNN = False
shuffle = True
pin_memory = True
num_workers = 1

In [6]:
modelTest = create_model("tf_efficientnet_b4",
        pretrained=False,
        num_classes=trainingDataset.class_id.unique().shape[0])

epoch=30

modelTest.load_state_dict(torch.load(MODEL_DIR+"model_"+str(epoch)+".pth"))
modelTest.eval()
modelTest.to(device)

EfficientNet(
  (conv_stem): Conv2dSame(3, 48, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (bn1): BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
      )
     

In [7]:
resultTableWithoutPostProcessing=np.array([[np.nan for i in range(0,trainingDataset.class_id.unique().shape[0]+2)]])
for i, data in enumerate(validation_loader, 0):
    inputs,observationid,file_path = data
    inputs=inputs.cuda()
    with torch.cuda.amp.autocast():
        outputs = modelTest(inputs)
        outputs=nn.functional.softmax(outputs,dim=1)
    outputs=outputs.cpu().detach().numpy()
    df1=pd.DataFrame(outputs)
    df1["observationid"]=observationid.detach().numpy()
    df1["file_path"]=file_path
    resultTableWithoutPostProcessing=np.concatenate((resultTableWithoutPostProcessing, df1), axis=0)

In [8]:
df=pd.DataFrame(resultTableWithoutPostProcessing)

df = df.iloc[1: , :]


colnamesTest=trainingDataset.binomial_name.unique().tolist()

colnamesTest.append("observationid")
colnamesTest.append("file_path")


df.columns=colnamesTest

df = df.iloc[:, :-1]

df=df.astype('float64')

dfgrouped=df.groupby('observationid').mean()

In [9]:
countryDistributionObsLevel=pd.read_csv(COUNTRY_DIST_FILE)

countryDistributionObsLevelBin=pd.read_csv(COUNTRY_DIST_FILE_BIN)

In [11]:
validationDatasetCountry=validationDataset.drop(["endemic","code","file_path","image_path"],axis=1)

validationDatasetCountry=validationDatasetCountry.drop_duplicates()

validationDatasetCountry.iloc[~validationDatasetCountry.country.isin(countryDistributionObsLevel["Unnamed: 0"]),1]="unknown"

datasetTestCountryDistObsLev=pd.merge(validationDatasetCountry,countryDistributionObsLevel,left_on="country",right_on="Unnamed: 0")
datasetTestCountryDistObsLevBin=pd.merge(validationDatasetCountry,countryDistributionObsLevelBin,left_on="country",right_on="Unnamed: 0")

datasetTestCountryDistObsLev=datasetTestCountryDistObsLev.sort_values("observation_id",ignore_index=True)

datasetTestCountryDistObsLevBin=datasetTestCountryDistObsLevBin.sort_values("observation_id",ignore_index=True)

datasetTestCountryDistObsLev=datasetTestCountryDistObsLev.drop(["observation_id","country","Unnamed: 0"],axis=1)

datasetTestCountryDistObsLevBin=datasetTestCountryDistObsLevBin.drop(["observation_id","country","Unnamed: 0"],axis=1)

datasetTestCountryDistObsLev.columns=dfgrouped.columns

datasetTestCountryDistObsLevBin.columns=dfgrouped.columns

pred=dfgrouped.to_numpy()

country=datasetTestCountryDistObsLev.to_numpy()

countryBin=datasetTestCountryDistObsLevBin.to_numpy()

results=np.multiply(pred,country)

resultsBin=np.multiply(pred,countryBin)

In [12]:
d = {'ObservationId': dfgrouped.index.tolist(), 'class_id': resultsBin.argmax(axis=1).tolist()}
predictionsWithCountryBin=pd.DataFrame(data=d)

In [13]:
d = {'ObservationId': dfgrouped.index.tolist(), 'class_id': results.argmax(axis=1).tolist()}
predictionsWithCountry=pd.DataFrame(data=d)

In [14]:
d = {'ObservationId': dfgrouped.index.tolist(), 'class_id': dfgrouped.to_numpy().argmax(axis=1).tolist()}
predictionsWithoutCountry=pd.DataFrame(data=d)

In [15]:
predDF=pd.DataFrame(pred)

predDF.to_csv(MODEL_DIR+"/predictions.csv",header=False, index=False)


In [21]:
predictionsWithCountry=predictionsWithCountry.astype("int64")

predictionsWithCountryBin=predictionsWithCountryBin.astype("int64")

predictionsWithoutCountry=predictionsWithoutCountry.astype("int64")

In [22]:
predictionsWithCountry.to_csv(MODEL_DIR+"/WithCountry.csv",index=False)

In [23]:
predictionsWithCountryBin.to_csv(MODEL_DIR+"/WithCountryBin.csv",index=False)

In [24]:
predictionsWithoutCountry.to_csv(MODEL_DIR+"/WithoutCountry.csv",index=False)