In [11]:
!pip install timm
!pip install --upgrade wandb



In [12]:
import os
import gc
import cv2
import math
import copy
import time 
import random 
import pickle
## データ操作
import numpy as np
import pandas as pd

## pytorch
import torch
import torch.nn as nn
import torch.optim as optim 
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp 

## Utils
import joblib
from tqdm import tqdm 
from collections import defaultdict

# Sklearn 
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

## for colored terminal text 
from colorama import Fore, Back, Style
b_=Fore.BLUE
sr_=Style.RESET_ALL

import warnings 
warnings.filterwarnings("ignore")

# Fore descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING']="1"

In [13]:
torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

device(type='cuda', index=0)

In [14]:
import wandb
try:
    from kaggle_secrets import UserSecrretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("wandb_api")
    wandb.login(key=api_key)
    anony = None
except:
    anony="must"
    print("If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token . Use the Label name as wandb_api . ")
    print("Get your W&B access token from here: https//:wandb.ai/authorize")

If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token . Use the Label name as wandb_api . 
Get your W&B access token from here: https//:wandb.ai/authorize


In [15]:
CONFIG={
    "seed":2022,
    "epochs": 4,
    "img_size":448,
    "model_name":"tf_efficientnet_b0_ns",
    "num_classes":15587,
    "embedding_size":512,
    "train_batch_size":32,
    "valid_batch_size":64,
    "learning_rate":1e-4,
    "scheduler":"CosineAnnealingLR",
    "min_lr":1e-6,
    "T_max":500,
    "weight_decay":1e-6,
    "n_fold":5,
    "n_accumulate":1,
    "device":torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    # ArcFace HyperParameter
    "s":30.0,
    "m":0.50,
    "ls_eps":0.0,
    "easy_margin":False
}

In [16]:
def set_seed(seed=0):
    '''Sets the seed of the entire notebook so results are same every time we run.This is for Reproducibility'''
    '''notebook 全体の結果が同じになるようにseedをセットする。再現性を確認するため'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # CuDNN backendを走らせるのに二つのオプションをセットする必要がある
    torch.backends.cudnn.determinisitc=True
    torch.backends.cudnn.benchmark=False
    # hash seed を修正してセットする
    os.environ['PYTHONHASHSEED']=str(seed)
set_seed(CONFIG["seed"])

In [17]:
ROOT_DIR= "../input/happy-whale-and-dolphin"
TRAIN_DIR="../input/happy-whale-and-dolphin/train_images"
TEST_DIR= "../input/happy-whale-and-dolphin/test_images"

In [18]:
def get_train_file_path(id):
    return f"{TRAIN_DIR}/{id}"

# データの読み込み

In [19]:
df=pd.read_csv(f"{ROOT_DIR}/train.csv")
df["file_path"]=df["image"].apply(get_train_file_path)
df.head()

Unnamed: 0,image,species,individual_id,file_path
0,00021adfb725ed.jpg,melon_headed_whale,cadddb1636b9,../input/happy-whale-and-dolphin/train_images/...
1,000562241d384d.jpg,humpback_whale,1a71fbb72250,../input/happy-whale-and-dolphin/train_images/...
2,0007c33415ce37.jpg,false_killer_whale,60008f293a2b,../input/happy-whale-and-dolphin/train_images/...
3,0007d9bca26a99.jpg,bottlenose_dolphin,4b00fe572063,../input/happy-whale-and-dolphin/train_images/...
4,00087baf5cef7a.jpg,humpback_whale,8e5253662392,../input/happy-whale-and-dolphin/train_images/...


In [20]:
encoder=LabelEncoder()

encoder.fit(df["individual_id"])
print(df["individual_id"][0])
print(encoder.transform(["cadddb1636b9"]))
df['individual_id']=encoder.transform(df["individual_id"])
print(encoder.classes_)
with open("le.pkl","wb") as fp:
    pickle.dump(encoder,fp)
df.head()

cadddb1636b9
[12348]
['0013f1f5f2f0' '001618e0a31e' '0018a0f40586' ... 'fffb11ff4575'
 'fffe15363b92' 'ffff6255f559']


Unnamed: 0,image,species,individual_id,file_path
0,00021adfb725ed.jpg,melon_headed_whale,12348,../input/happy-whale-and-dolphin/train_images/...
1,000562241d384d.jpg,humpback_whale,1636,../input/happy-whale-and-dolphin/train_images/...
2,0007c33415ce37.jpg,false_killer_whale,5842,../input/happy-whale-and-dolphin/train_images/...
3,0007d9bca26a99.jpg,bottlenose_dolphin,4551,../input/happy-whale-and-dolphin/train_images/...
4,00087baf5cef7a.jpg,humpback_whale,8721,../input/happy-whale-and-dolphin/train_images/...


# Create Folds

In [21]:
skf = StratifiedKFold(n_splits=CONFIG["n_fold"])

for fold , (_, val_) in enumerate(skf.split(X=df, y=df.individual_id)):
    df.loc[val_, "kfold"]=fold

# Dataset Class

In [22]:
class HappyWhaleDataset(Dataset):
    def __init__(self,df,transforms=None):
        self.df=df
        self.file_names=df["file_path"].values
        self.labels=df["individual_id"].values
        self.transforms=transforms
    def __len__(self):
        return len(self.df)
    def __getitem__(self,index):
        img_path=self.file_names[index]
        #print(img_path)
        img=cv2.imread(img_path)
        img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        label=self.labels[index]
        if self.transforms:
            img=self.transforms(image=img)["image"]
        return {
            "image":img,
            "label": torch.tensor(label,dtype=torch.long)
        }

# Augmentations

In [23]:
data_transforms={
    "train": A.Compose([
        A.Resize(CONFIG["img_size"],CONFIG["img_size"]),
        A.ShiftScaleRotate(shift_limit=0.1,
                           scale_limit=0.15,
                           rotate_limit=60,
                           p=0.5),
        A.HueSaturationValue(
            hue_shift_limit=0.2,
            sat_shift_limit=0.2,
            val_shift_limit=0.2,
            p=0.5
        ),
        A.RandomBrightnessContrast(
            brightness_limit=(-0.1,0.1),
            contrast_limit=(-0.1,0.1),
            p=0.5
        ),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        
    ToTensorV2()],p=1.),
    
    "valid": A.Compose([
        A.Resize(CONFIG["img_size"],CONFIG["img_size"]),
        A.Normalize(
            mean=[0.485,0.456,0.406],
            std=[0.229,0.224,0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        
    ToTensorV2()],p=1.)
}

# GeM Pooling

In [24]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM,self).__init__()
        self.p=nn.Parameter(torch.ones(1)*p)
        self.eps=eps
    def forward(self,x):
        return self.gem(x, p=self.p, eps=self.eps)
    def gem(self,x,p=3,eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p),(x.size(-2),x.size(-1))).pow(1./p)
    def __repr__(self):
        return self.__class__.__name__+ \
                "("+"p="+ '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ','+'eps='+str(self.eps)+")"
        

# ArcFace

In [25]:
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature 
            m: margin
            cos(theta +m)
        """
    def __init__(self, in_features,out_features,s=30.0,m=0.50,easy_margin=False, ls_eps=0.0):
        super(ArcMarginProduct,self).__init__()
        self.in_features=in_features
        self.out_features = out_features
        self.s=s
        self.m=m
        self.ls_eps=ls_eps
        self.weight=nn.Parameter(torch.FloatTensor(out_features,in_features))
        nn.init.xavier_uniform_(self.weight)
        
        self.easy_margin=easy_margin
        self.cos_m=math.cos(m)
        self.sin_m=math.sin(m)
        self.th=math.cos(math.pi-m)
        self.mm=math.sin(math.pi-m)*m
    
    def forward(self,input,label):
        ## --------------------cos(theta) and phi(theta)--------------
        cosine=F.linear(F.normalize(input),F.normalize(self.weight))
        sine=torch.sqrt(1.0-torch.pow(cosine,2))
        phi=cosine*self.cos_m-sine*self.sin_m
        if self.easy_margin:
            phi=torch.where(cosine>0,phi,cosine)
        else:
            phi=torch.where(cosine>self.th,phi,cosine-self.mm)
            
        # ----------convert label to one-hot
        # one_hot=torch.zeros(cosine.size(),requires_grad=True,device='cuda')
        one_hot=torch.zeros(cosine.size(),device=CONFIG["device"])
        one_hot.scatter_(1,label.view(-1,1).long(),1)
        if self.ls_eps>0:
            one_hot=(1-self.ls_eps)*one_hot+self.ls_eps/self.out_features
        # ----------torch.where(out_i={x_i if condition_i else y_i})-----------------
        output=(one_hot*phi)+((1.0-one_hot)*cosine)
        
        return output
            

# Create Model

In [26]:
class HappyWhaleModel(nn.Module):
    def __init__(self,model_name,embedding_size,pretrained=True):
        super(HappyWhaleModel,self).__init__()
        self.model=timm.create_model(model_name,pretrained=pretrained)
        in_features=self.model.classifier.in_features
        self.model.classifier=nn.Identity()
        self.model.global_pool=nn.Identity()
        self.pooling=GeM()
        self.embedding=nn.Linear(in_features,embedding_size)
        self.fc=ArcMarginProduct(embedding_size,CONFIG["num_classes"],s=CONFIG["s"],m=CONFIG["m"],easy_margin=CONFIG["easy_margin"],ls_eps=CONFIG["ls_eps"])
    def forward(self,images,labels):
        features=self.model(images)
        pooled_features=self.pooling(features).flatten(1)
        embedding=self.embedding(pooled_features)
        output = self.fc(embedding,labels)
        return output
    def extract(self,images):
        features=self.model(images)
        poooled_features=self.pooling(features).flatten(1)
        embedding=self.embedding(pooled_features)
        return embedding
model=HappyWhaleModel(CONFIG["model_name"],CONFIG["embedding_size"])
model.to(CONFIG["device"])



Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ns-c0e6a31c.pth" to /root/.cache/torch/hub/checkpoints/tf_efficientnet_b0_ns-c0e6a31c.pth


HappyWhaleModel(
  (model): EfficientNet(
    (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (act1): SiLU(inplace=True)
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (act1): SiLU(inplace=True)
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_

# Loss Function

In [27]:
def criterion(outputs,labels):
    return nn.CrossEntropyLoss()(outputs,labels)

# Training Function

In [28]:
def train_one_epoch(model,optimizer,scheduler,dataloader,device,epoch):
    model.train()
    dataset_size=0
    running_loss=0.0
    bar=tqdm(enumerate(dataloader),total=len(dataloader))
    for step, data in bar:
        images=data["image"].to(device,dtype=torch.float)
        labels=data["label"].to(device,dtype=torch.long)
        
        batch_size=images.size(0)
        outputs=model(images,labels)
        loss=criterion(outputs,labels)
        loss=loss/CONFIG["n_accumulate"]
        
        loss.backward()
        
        if(step+1)%CONFIG["n_accumulate"]==0:
            optimizer.step()
            
            # zero the parameter gradients
            optimizer.zero_grad()
            if scheduler is not None:
                scheduler.step()
        running_loss+=(loss.item()*batch_size)
        dataset_size+=batch_size
        
        epoch_loss=running_loss/dataset_size
        
        bar.set_postfix(Epoch=epoch,Train_Loss=epoch_loss,LR=optimizer.param_groups[0]["lr"])
        del images
        del labels
        torch.cuda.empty_cache()
    gc.collect()
    return epoch_loss


# Validation Function

In [29]:
@torch.inference_mode()
def valid_one_epoch(model,dataloader,device,epoch):
    with torch.no_grad():
        
        model.eval()
        dataset_size=0
        running_loss=0.0

        bar=tqdm(enumerate(dataloader),total=len(dataloader))
        for step,data in bar:
            images=data["image"].to(device,dtype=torch.float)
            labels=data["label"].to(device,dtype=torch.long)

            batch_size=images.size(0)

            outputs=model(images,labels)
            loss=criterion(outputs,labels)

            running_loss+=(loss.item()*batch_size)
            dataset_size+=batch_size
            epoch_loss=running_loss/dataset_size

            bar.set_postfix(Epoch=epoch,Valid_Loss=epoch_loss,LR=optimizer.param_groups[0]["lr"])
        gc.collect()
        return epoch_loss

# Run Training

In [30]:
def run_training(model,optimizer,scheduler,device,num_epochs):
    wandb.watch(model,log_freq=100)
    if torch.cuda.is_available():
        print("[INFO] Using Gpu {}\n".format(torch.cuda.get_device_name()))
    
    start=time.time()
    best_model_wts=copy.deepcopy(model.state_dict())
    best_epoch_loss=np.inf
    
    history=defaultdict(list)
    
    for epoch in range(1,num_epochs+1):
        gc.collect()
        train_epoch_loss = train_one_epoch(model,optimizer,scheduler,dataloader=train_loader,device=CONFIG["device"],epoch=epoch)
        val_epoch_loss   = valid_one_epoch(model,valid_loader,device=CONFIG["device"],epoch=epoch)
        history["Train Loss"].append(train_epoch_loss)
        history["Valid Loss"].append(val_epoch_loss)
        
        # Log the metrics
        wandb.log({"Train Loss":train_epoch_loss})
        wandb.log({"Valid Loss":val_epoch_loss})
        
        # deep copy the model
        if val_epoch_loss<=best_epoch_loss:
            print(f"{b_}Validation Loss Improved({best_epoch_loss}   ------>  {val_epoch_loss})")
            best_epoch_loss=val_epoch_loss
            run.summary["Best Loss"]=best_epoch_loss
            best_model_wts=copy.deepcopy(model.state_dict())
            PATH="Loss{:.4f}_epoch{:.0f}.bin".format(best_epoch_loss,epoch)
            torch.save(model.state_dict(),PATH)
            # save a model file from the current dictory
            print(f"Model Saved{sr_}")
        print()
    end=time.time()
    time_elapsed=end-start
    print("Training complete in {:.0f}h {:.0f}m {:.0f}s".format(time_elapsed//3600, (time_elapsed%3600)//60,(time_elapsed%3600)%60))
    print("Best Loss: {:.4f}".format(best_epoch_loss))
    
    model.load_state_dict(best_model_wts)
    
    return model, history



In [31]:
def fetch_scheduler(optimizer):
    if CONFIG["scheduler"]=="CosineAnnealingLR":
        scheduler =lr_scheduler.CosineAnnealingLR(optimizer,T_max=CONFIG["T_max"],eta_min=CONFIG["min_lr"])
    
    elif CONFIG["scheduler"]=="CosineAnnealingWarmRestarts":
        scheduler =lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=CONFIG["T_0"],eta_min=CONFIG["min_lr"])
    elif CONFIG["scheduler"]==None:
        return None
    return scheduler

In [32]:
def prepare_loaders(df,fold):
    df_train=df[df.kfold !=fold].reset_index(drop=True)
    df_valid=df[df.kfold ==fold].reset_index(drop=True)
    
    train_dataset= HappyWhaleDataset(df_train, transforms=data_transforms["train"])
    valid_dataset= HappyWhaleDataset(df_valid, transforms=data_transforms["valid"])
    
    train_loader=DataLoader(train_dataset,batch_size=CONFIG["train_batch_size"], num_workers=2,shuffle=True,pin_memory=True,drop_last=True)
    valid_loader=DataLoader(valid_dataset,batch_size=CONFIG["valid_batch_size"], num_workers=2,shuffle=False,pin_memory=True)
    return train_loader,valid_loader

# Prepare DataLoaders

In [33]:
train_loader, valid_loader=prepare_loaders(df,fold=0)

# Define Optimzier and Scheduler

In [34]:
optimizer=optim.Adam(model.parameters(),lr=CONFIG["learning_rate"], weight_decay=CONFIG["weight_decay"])
scheduler=fetch_scheduler(optimizer)

# Start Training

In [35]:
run=wandb.init(project="HappyWhale",config=CONFIG,job_type="Train",tags=["arcface","gem-pooling","effnet-b0-ns","448"],anonymous="must")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [36]:
model,history=run_training(model,optimizer,scheduler,device=CONFIG["device"],num_epochs=CONFIG["epochs"])

[INFO] Using Gpu Tesla P100-PCIE-16GB



100%|██████████| 1275/1275 [51:08<00:00,  2.41s/it, Epoch=1, LR=4.28e-5, Train_Loss=9.64]
100%|██████████| 160/160 [10:40<00:00,  4.00s/it, Epoch=1, LR=4.28e-5, Valid_Loss=9.43]


[34mValidation Loss Improved(inf   ------>  9.434924489874629)
Model Saved[0m



100%|██████████| 1275/1275 [50:55<00:00,  2.40s/it, Epoch=2, LR=3.42e-6, Train_Loss=9.37]
100%|██████████| 160/160 [10:34<00:00,  3.97s/it, Epoch=2, LR=3.42e-6, Valid_Loss=9.31]


[34mValidation Loss Improved(9.434924489874629   ------>  9.310702243093717)
Model Saved[0m



100%|██████████| 1275/1275 [50:56<00:00,  2.40s/it, Epoch=3, LR=7.3e-5, Train_Loss=9.27] 
100%|██████████| 160/160 [10:35<00:00,  3.97s/it, Epoch=3, LR=7.3e-5, Valid_Loss=9.25]


[34mValidation Loss Improved(9.310702243093717   ------>  9.245342336392863)
Model Saved[0m



100%|██████████| 1275/1275 [51:11<00:00,  2.41s/it, Epoch=4, LR=9.05e-5, Train_Loss=9.22]
100%|██████████| 160/160 [10:35<00:00,  3.97s/it, Epoch=4, LR=9.05e-5, Valid_Loss=9.21]


[34mValidation Loss Improved(9.245342336392863   ------>  9.20961541934241)
Model Saved[0m

Training complete in 4h 6m 41s
Best Loss: 9.2096


In [37]:
run.finish()




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train Loss,█▃▂▁
Valid Loss,█▄▂▁

0,1
Best Loss,9.20962
Train Loss,9.22
Valid Loss,9.20962


In [38]:
import pickle
model_file="trained_model.pkl"
pickle.dump(model,open(model_file,"wb"))
path="/kaggle/working/"
fileList=os.listdir(path)
for f in fileList:
    print(f)

wandb
le.pkl
Loss9.4349_epoch1.bin
Loss9.2453_epoch3.bin
trained_model.pkl
Loss9.2096_epoch4.bin
Loss9.3107_epoch2.bin
__notebook_source__.ipynb


# Submission

In [39]:
def get_test_image_path(i):
    return TEST_DIR+"/"+i

with open("le.pkl","rb") as web:
    encoder=pickle.load(web)
test_df=pd.read_csv(ROOT_DIR+"/sample_submission.csv")
print(encoder.inverse_transform([12348]))
test_df["file_path"]=test_df["image"].apply(get_test_image_path)
test_df["individual_id"]="cadddb1636b9"
test_df['individual_id']=encoder.transform(test_df["individual_id"])
#test_df=test_df.head()
test_df.head()

['cadddb1636b9']


Unnamed: 0,image,predictions,file_path,individual_id
0,000110707af0ba.jpg,37c7aba965a5 114207cab555 a6e325d8e924 19fbb96...,../input/happy-whale-and-dolphin/test_images/0...,12348
1,0006287ec424cb.jpg,37c7aba965a5 114207cab555 a6e325d8e924 19fbb96...,../input/happy-whale-and-dolphin/test_images/0...,12348
2,000809ecb2ccad.jpg,37c7aba965a5 114207cab555 a6e325d8e924 19fbb96...,../input/happy-whale-and-dolphin/test_images/0...,12348
3,00098d1376dab2.jpg,37c7aba965a5 114207cab555 a6e325d8e924 19fbb96...,../input/happy-whale-and-dolphin/test_images/0...,12348
4,000b8d89c738bd.jpg,37c7aba965a5 114207cab555 a6e325d8e924 19fbb96...,../input/happy-whale-and-dolphin/test_images/0...,12348


In [40]:
df.head()

Unnamed: 0,image,species,individual_id,file_path,kfold
0,00021adfb725ed.jpg,melon_headed_whale,12348,../input/happy-whale-and-dolphin/train_images/...,0.0
1,000562241d384d.jpg,humpback_whale,1636,../input/happy-whale-and-dolphin/train_images/...,1.0
2,0007c33415ce37.jpg,false_killer_whale,5842,../input/happy-whale-and-dolphin/train_images/...,0.0
3,0007d9bca26a99.jpg,bottlenose_dolphin,4551,../input/happy-whale-and-dolphin/train_images/...,0.0
4,00087baf5cef7a.jpg,humpback_whale,8721,../input/happy-whale-and-dolphin/train_images/...,0.0


In [41]:
# del train_loader
# del valid_loader


In [42]:

gc.collect()
torch.cuda.empty_cache()

#torch.cuda.empty_cache()
!  nvidia-smi

Mon Apr 18 19:09:10 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.82.01    Driver Version: 470.82.01    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P0    38W / 250W |   1449MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [43]:
#model=pickle.load(open(model_file,"rb"))
#!  nvidia-smi

In [44]:
run=wandb.init(project="HappyWhale",config=CONFIG,job_type="evaluate",tags=["arcface","gem-pooling","effnet-b0-ns","448"],anonymous="must")
#wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33manony-moose-238729[0m (use `wandb login --relogin` to force relogin)


In [45]:
#print(encoder.classes_)
#print(encoder.inverse_transform(12348))

In [46]:
print(len(test_df["image"]))
with torch.no_grad():
    model.eval()
    j=0
    if True:
    #for j in range(len(test_df["image"])):
        test_dataset= HappyWhaleDataset(test_df, transforms=data_transforms["valid"])

        test_loader=DataLoader(test_dataset,batch_size=CONFIG["valid_batch_size"], num_workers=2,shuffle=False,pin_memory=True)
        bar=tqdm(enumerate(test_loader),total=len(test_loader))
        print(j)
        for step,data in bar:

            images=data["image"].to(CONFIG["device"],dtype=torch.float)
            #print(images.shape)
            labels=data["label"].to(CONFIG["device"],dtype=torch.long)
            batch_size=images.size(0)
            outputs=model(images,labels)
            prediction = outputs.to('cpu').detach().numpy().copy()
            #print(prediction.shape)
            for k in range(len(prediction)):
                
                for i in range(4):
                    if i==0:
                        test_df["predictions"][j]=""
                        test_df["predictions"][j]=encoder.inverse_transform([np.argsort(prediction[k])[::-1][i]])[0]
                    else:    
                        test_df["predictions"][j]=test_df["predictions"][j]+" "+encoder.inverse_transform([np.argsort(prediction[k])[::-1][i]])[0]
                    #print (encoder.inverse_transform([np.argsort(prediction)[0][::-1][i]]),np.sort(prediction)[0][::-1][i])
                test_df["predictions"][j]=test_df["predictions"][j]+" new_individual"
                #print(str(k)+" "+str(j)+" "+test_df["predictions"][j])
                j+=1
            del images
            del labels
            del outputs
            
            torch.cuda.empty_cache()
test_df.head()

27956


  0%|          | 0/437 [00:00<?, ?it/s]

0


100%|██████████| 437/437 [31:58<00:00,  4.39s/it]


Unnamed: 0,image,predictions,file_path,individual_id
0,000110707af0ba.jpg,4b8534134eb8 445270d9ad52 1ae5c5a035b5 0c7efa8...,../input/happy-whale-and-dolphin/test_images/0...,12348
1,0006287ec424cb.jpg,91ed5caeb0d3 bbeac4b2964e 180c0ab04dcd 938b7e9...,../input/happy-whale-and-dolphin/test_images/0...,12348
2,000809ecb2ccad.jpg,ff26e042cd52 31f748b822f4 51081e431bca 322a187...,../input/happy-whale-and-dolphin/test_images/0...,12348
3,00098d1376dab2.jpg,938b7e931166 c93996835aa8 0b180ad0afa2 02da0e6...,../input/happy-whale-and-dolphin/test_images/0...,12348
4,000b8d89c738bd.jpg,52a2c490b25a e33a507393ca 77410a623426 ca88903...,../input/happy-whale-and-dolphin/test_images/0...,12348


In [47]:
run.finish()




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [48]:
submission_df=pd.read_csv('../input/happy-whale-and-dolphin/sample_submission.csv')
submission_df["predictions"]=test_df["predictions"]
submission_df.head()

Unnamed: 0,image,predictions
0,000110707af0ba.jpg,4b8534134eb8 445270d9ad52 1ae5c5a035b5 0c7efa8...
1,0006287ec424cb.jpg,91ed5caeb0d3 bbeac4b2964e 180c0ab04dcd 938b7e9...
2,000809ecb2ccad.jpg,ff26e042cd52 31f748b822f4 51081e431bca 322a187...
3,00098d1376dab2.jpg,938b7e931166 c93996835aa8 0b180ad0afa2 02da0e6...
4,000b8d89c738bd.jpg,52a2c490b25a e33a507393ca 77410a623426 ca88903...


In [49]:
submission_df.to_csv("submission.csv",index=False)

In [50]:
len(submission_df["predictions"])

27956