### Regression notebook for Wadhwani AI competition

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import os
import gc
import random
from glob import glob
from sklearn.model_selection import GroupKFold, KFold, StratifiedKFold
import warnings
import pickle
import json
import re
import time
import sys
from requests import get
import multiprocessing
import joblib
import torch
from torch.utils.data import Dataset, DataLoader
import transformers
import torch.nn as nn
import torch.nn.functional as F
from torch.cuda.amp import GradScaler, autocast
# from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import timm
from sklearn.preprocessing import minmax_scale
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2,torchvision
from ipyexperiments.ipyexperiments import IPyExperimentsPytorch
from timm.optim.optim_factory import create_optimizer_v2
from timm import utils
from fastprogress.fastprogress import format_time
from fastai.vision.all import *
from sklearn.multioutput import MultiOutputRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error
class CFG:
    seed = 46
    n_splits = 5
    SZ = 1280
    debug = False
    BS = 16
    EP = 10
    MODEL = 'tf_efficientnet_b0_ns'
    LR = 5e-03
    WD = 1e-08

random.seed(CFG.seed)
os.environ["PYTHONHASHSEED"] = str(CFG.seed)
np.random.seed(CFG.seed)
plt.rcParams["font.size"] = 13
warnings.filterwarnings('ignore')

In [2]:
set_seed(CFG.seed)

In [3]:
DIR = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/'
IMG_PATH = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/images'
submit = pd.read_csv(os.path.join(DIR,'SampleSubmission.csv'))
train = pd.read_csv(os.path.join(DIR,'Train.csv'))
test_df = pd.read_csv(os.path.join(DIR,'Test.csv'))
labels_dir = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/Mixed/fold0_infer_1280_CONFTHRESH_45/labels'

VERSION = "NB_EXP_V0_001_Mixed"
MODEL_FOLDER = Path(f"///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/Mixed/{VERSION}/")
os.makedirs(MODEL_FOLDER,exist_ok=True)
KERNEL_TYPE = f"{CFG.MODEL}_{CFG.SZ}_bs{CFG.BS}_ep{CFG.EP}_lr{str(CFG.LR).replace('-','')}_wd{str(CFG.WD).replace('-','')}"

print(MODEL_FOLDER)
print(KERNEL_TYPE)

/mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/Mixed/NB_EXP_V0_001_Mixed
tf_efficientnet_b0_ns_1280_bs16_ep10_lr0.005_wd1e08


### OOF For Yolo

In [4]:
yolo_oof = pd.read_csv('///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/yolov5l6_exp003/OOF/OOF.csv')
yolo_oof.drop('Unnamed: 0',1,inplace=True)
yolo_oof.rename(columns = {'abw':'abw_yolo','pbw':'pbw_yolo'},inplace=True)
yolo_oof

Unnamed: 0,image_id_worm,abw_yolo,pbw_yolo
0,id_0002ea6f15c7fa6f4c221783,0,41
1,id_0005ef295aafe6acc63587db,0,8
2,id_00093f2c76f6488737325859,0,7
3,id_000b2e6c437c643f25d4a6c3,0,86
4,id_000c2040da4b05816cefbb96,0,0
...,...,...,...
7074,id_ffd6fc216afef2eb42f8e985,0,78
7075,id_ffe6e193dd78b7258d864f6b,5,0
7076,id_ffe98feb1e592e887b81c3f0,0,102
7077,id_ffef90ecf47baa38434f84fb,0,30


### Yolo Test Preds

In [5]:
yolo_preds = pd.read_csv('///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/yolov5l6_exp003/Ensemble/Infer_2048_CONFTHRESH_45_5_Folds/test_preds.csv')
yolo_preds.drop('Unnamed: 0',1,inplace=True)
yolo_preds.rename(columns = {'abw':'abw_yolo','pbw':'pbw_yolo'},inplace=True)
yolo_preds['image_id_worm'] = test_df['image_id_worm']
yolo_preds['image_id_worm'] = yolo_preds['image_id_worm'].apply(lambda x:x[:-4])
yolo_preds.head()

Unnamed: 0,image_id_worm,abw_yolo,pbw_yolo
0,id_00332970f80fa9a47a39516d,4,0
1,id_0035981bc3ae42eb5b57a317,0,21
2,id_005102f664b820f778291dee,9,0
3,id_0066456f5fb2cd858c69ab39,5,0
4,id_007159c1fa015ba6f394deeb,0,0


### Reg OOF

In [6]:
reg_oof = np.load('///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/Regression/NB_EXP_V0_007_Regression/oof_preds.npy')
reg_oof_ids = np.load('///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/Regression/NB_EXP_V0_007_Regression/oof_ids.npy')

In [7]:
reg_oof = pd.DataFrame(reg_oof)
reg_oof.rename(columns = {0:'abw_reg',1:'pbw_reg'},inplace=True)
reg_oof_ids = pd.DataFrame(reg_oof_ids)
reg_oof_ids.rename(columns = {0:'image_id_worm'},inplace=True)
reg_oof = pd.concat([reg_oof_ids,reg_oof],1)
reg_oof['image_id_worm'] = reg_oof['image_id_worm'].apply(lambda x:x[:-4])
reg_oof.head()

Unnamed: 0,image_id_worm,abw_reg,pbw_reg
0,id_0005ef295aafe6acc63587db,0.0,7.042969
1,id_000b2e6c437c643f25d4a6c3,0.0,78.6875
2,id_000c2040da4b05816cefbb96,0.0,0.0
3,id_00b6b77332b132dbb58a7dc4,0.0,7.261719
4,id_00d5771ea3336b8bec3efadc,0.0,3.134766


In [8]:
reg_oof.isna().sum()

image_id_worm    0
abw_reg          0
pbw_reg          0
dtype: int64

### Reg Test Preds

In [9]:
reg_preds = np.load('///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/Regression/NB_EXP_V0_007_Regression/test_preds.npy')
reg_preds = pd.DataFrame(reg_preds)
reg_preds.rename(columns = {0:'abw_reg',1:'pbw_reg'},inplace=True)
reg_preds['image_id_worm'] = test_df['image_id_worm']
reg_preds['image_id_worm'] = reg_preds['image_id_worm'].apply(lambda x:x[:-4])
reg_preds.head()

Unnamed: 0,abw_reg,pbw_reg,image_id_worm
0,3.873047,0.0,id_00332970f80fa9a47a39516d
1,0.0,19.75,id_0035981bc3ae42eb5b57a317
2,9.164062,0.0,id_005102f664b820f778291dee
3,4.582031,0.0,id_0066456f5fb2cd858c69ab39
4,0.0,0.0,id_007159c1fa015ba6f394deeb


In [10]:
[i for i in test_df['image_id_worm'].apply(lambda x:x[:-4]) if i not in reg_preds['image_id_worm']]

['id_00332970f80fa9a47a39516d',
 'id_0035981bc3ae42eb5b57a317',
 'id_005102f664b820f778291dee',
 'id_0066456f5fb2cd858c69ab39',
 'id_007159c1fa015ba6f394deeb',
 'id_0087ad7987b3f82bc5b5c7d0',
 'id_00887bebda26184c36e18e00',
 'id_00ba116c0f45a71a7e0e652c',
 'id_00c11f7689e2351305cb12e3',
 'id_00c1c4a5f0e0354f8e2aa416',
 'id_00dcb00a08a4aeafff1359da',
 'id_00e16adc89f71646eafaca69',
 'id_00eb9a777926488215ce62ed',
 'id_00f9ce0575a49398f290186b',
 'id_011bb96d55f9a0823a2caae6',
 'id_012a8855adcc9ca2a522d771',
 'id_012f50e13a438474cc63b7b7',
 'id_016185ddae947c538eac2c33',
 'id_016965ccadb213b2f74fc1e6',
 'id_01790f69486d24f4d22f8bf9',
 'id_017f0b03f1c86cf4df9f9811',
 'id_0192241cfd3684ed17185acf',
 'id_01a90d01ea45b5f4a25a8579',
 'id_01c70107aed2aa2cded0f3c3',
 'id_01d1cc351e167df77cf8ac8b',
 'id_01dbd7ce330c62693ec4d7d7',
 'id_01f378ca4c1c3dc689421bff',
 'id_01f8de1e976cb8bd8783253a',
 'id_0212b1bd6a91a586716175ca',
 'id_025aa15fb1c4f64b7bad6aef',
 'id_025fe02ee66bfa2670e8326b',
 'id_02b

In [11]:
reg_preds.isna().sum()

abw_reg          0
pbw_reg          0
image_id_worm    0
dtype: int64

### Classification OOF

In [12]:
cls_oof = np.load('///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/Classification/NB_EXP_V0_001_Classification/oof_preds.npy')
cls_oof_ids = np.load('///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/Classification/NB_EXP_V0_001_Classification/oof_ids.npy')

cls_oof = pd.DataFrame(cls_oof)
cls_oof.rename(columns = {0:'abw_cls',1:'pbw_cls'},inplace=True)
cls_oof_ids = pd.DataFrame(cls_oof_ids)
cls_oof_ids.rename(columns = {0:'image_id_worm'},inplace=True)
cls_oof = pd.concat([cls_oof_ids,cls_oof],1)
cls_oof['image_id_worm'] = cls_oof['image_id_worm'].apply(lambda x:x[:-4])
cls_oof.head()

Unnamed: 0,image_id_worm,abw_cls,pbw_cls
0,id_0005ef295aafe6acc63587db,0.000538,1.0
1,id_000b2e6c437c643f25d4a6c3,0.00387,0.964844
2,id_000c2040da4b05816cefbb96,0.018341,0.013275
3,id_00b6b77332b132dbb58a7dc4,0.000507,0.998047
4,id_00d5771ea3336b8bec3efadc,8.5e-05,1.0


In [13]:
cls_oof.isna().sum()

image_id_worm    0
abw_cls          0
pbw_cls          0
dtype: int64

### Cls Test Preds

In [14]:
cls_preds = np.load('///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/Classification/NB_EXP_V0_001_Classification/test_preds.npy')
cls_preds = pd.DataFrame(cls_preds)
cls_preds.rename(columns = {0:'abw_cls',1:'pbw_cls'},inplace=True)
cls_preds['image_id_worm'] = test_df['image_id_worm']
cls_preds['image_id_worm'] = cls_preds['image_id_worm'].apply(lambda x:x[:-4])
cls_preds.head()

Unnamed: 0,abw_cls,pbw_cls,image_id_worm
0,1.0,0.000136,id_00332970f80fa9a47a39516d
1,0.000656,0.999512,id_0035981bc3ae42eb5b57a317
2,1.0,1.7e-05,id_005102f664b820f778291dee
3,0.963379,0.040863,id_0066456f5fb2cd858c69ab39
4,0.005249,0.048584,id_007159c1fa015ba6f394deeb


In [15]:
cls_preds.isna().sum()

abw_cls          0
pbw_cls          0
image_id_worm    0
dtype: int64

### Get kfolds

In [16]:
def make_train_dataset():
    train_new_pbw = pd.DataFrame()
    train_new_pbw['image_id_worm']= train['image_id_worm'].unique()
    train_new_pbw = pd.merge(train_new_pbw,train[train['worm_type']=='pbw'].reset_index(drop=True),on='image_id_worm',how='left')
    train_new_pbw['worm_type'] = 'pbw'
    train_new_pbw.fillna(0,inplace=True)
    
    train_new_abw = pd.DataFrame()
    train_new_abw['image_id_worm']= train['image_id_worm'].unique()
    train_new_abw = pd.merge(train_new_abw,train[train['worm_type']=='abw'].reset_index(drop=True),on='image_id_worm',how='left')
    train_new_abw['worm_type'] = 'abw'
    train_new_abw.fillna(0,inplace=True)
    
    train_out = pd.concat([train_new_pbw,train_new_abw],0).reset_index(drop=True)
    
    assert len(train_out) == train['image_id_worm'].nunique()*2
    train_out = pd.pivot(train_out,'image_id_worm','worm_type','number_of_worms').reset_index()
    train_out[['abw','pbw']] = train_out[['abw','pbw']].astype(int)
    
    labels = [f'{i}' for i in range(10)]
    train_out['abw_bins'] = pd.cut(train_out['abw'],10,labels=labels)
    train_out['pbw_bins'] = pd.cut(train_out['pbw'],10,labels=labels)
    train_out['consol_bins'] = train_out['abw_bins'].astype(str)+'_'+train_out['pbw_bins'].astype(str)
    
#     train_out = train_out[['image_id_worm','abw','pbw','abw_bins','pbw_bins','consol_bins']]
    return train_out

train_new = make_train_dataset()

In [17]:
train_files = []
val_files = []
fold = []

for folds in [0,1,2,3,4]:
    files = list(pd.read_csv(f'///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/splits/fold{folds}.txt',header=None)[0].apply(lambda x:x.split("/")[-1]))
    fold.append([folds]*len(files))
    train_files.append(files)

train_files= ([item for sublist in train_files for item in sublist])
fold= ([item for sublist in fold for item in sublist])
fold_dict = dict(zip(train_files,fold))
train_new['fold'] = train_new['image_id_worm'].map(fold_dict)

In [18]:
train_new.head()

worm_type,image_id_worm,abw,pbw,abw_bins,pbw_bins,consol_bins,fold
0,id_0002ea6f15c7fa6f4c221783.jpg,0,51,0,0,0_0,1.0
1,id_0005ef295aafe6acc63587db.jpg,0,8,0,0,0_0,0.0
2,id_00084298dd030a500033ff78.jpg,0,0,0,0,0_0,
3,id_00093f2c76f6488737325859.jpg,0,12,0,0,0_0,2.0
4,id_000b2e6c437c643f25d4a6c3.jpg,0,87,0,1,0_1,0.0


In [19]:
train_new = train_new[~train_new['fold'].isna()].reset_index(drop=True)
train_new['fold'] = train_new['fold'].astype(int)
train_new['image_id_worm'] = train_new['image_id_worm'].apply(lambda x:x[:-4])
print(train_new['fold'].unique())
train_new.shape

[1 0 2 4 3]


(7079, 7)

In [20]:
train_new.isna().sum()

worm_type
image_id_worm    0
abw              0
pbw              0
abw_bins         0
pbw_bins         0
consol_bins      0
fold             0
dtype: int64

In [21]:
train_new.head()

worm_type,image_id_worm,abw,pbw,abw_bins,pbw_bins,consol_bins,fold
0,id_0002ea6f15c7fa6f4c221783,0,51,0,0,0_0,1
1,id_0005ef295aafe6acc63587db,0,8,0,0,0_0,0
2,id_00093f2c76f6488737325859,0,12,0,0,0_0,2
3,id_000b2e6c437c643f25d4a6c3,0,87,0,1,0_1,0
4,id_000c2040da4b05816cefbb96,0,0,0,0,0_0,0


In [22]:
train_new.shape

(7079, 7)

### Merge OOFs

In [23]:
oof_df = pd.merge(yolo_oof,reg_oof,on='image_id_worm',how='left')
oof_df = pd.merge(oof_df, cls_oof,on='image_id_worm',how='left')
oof_df = pd.merge(oof_df,train_new[['image_id_worm','fold']],on='image_id_worm',how='left')
oof_df = oof_df[~oof_df['fold'].isna()].reset_index(drop=True) 
oof_df = pd.merge(oof_df,train_new[['image_id_worm','abw','pbw']],on='image_id_worm',how='left')
oof_df['fold'] = oof_df['fold'].astype(int)
oof_df['fold'].value_counts()



0    1417
3    1417
1    1416
4    1416
2    1411
Name: fold, dtype: int64

In [24]:
oof_df.isna().sum()

image_id_worm    0
abw_yolo         0
pbw_yolo         0
abw_reg          0
pbw_reg          0
abw_cls          0
pbw_cls          0
fold             0
abw              0
pbw              0
dtype: int64

In [25]:
cls_preds

Unnamed: 0,abw_cls,pbw_cls,image_id_worm
0,1.000000,0.000136,id_00332970f80fa9a47a39516d
1,0.000656,0.999512,id_0035981bc3ae42eb5b57a317
2,1.000000,0.000017,id_005102f664b820f778291dee
3,0.963379,0.040863,id_0066456f5fb2cd858c69ab39
4,0.005249,0.048584,id_007159c1fa015ba6f394deeb
...,...,...,...
2798,0.000222,0.999512,id_ffad8f3773a4222f8fe5ba1a
2799,0.001793,0.998535,id_ffb65e6de900c49d8f2ef95a
2800,0.000815,0.001739,id_ffbcb27fa549278f47505515
2801,0.003998,0.025574,id_ffc0e41e10b0c964d4a02811


In [26]:
pred_df = pd.merge(reg_preds,yolo_preds,on='image_id_worm',how='left')
pred_df = pd.merge(pred_df, cls_preds,on='image_id_worm',how='left')
pred_df.head()

Unnamed: 0,abw_reg,pbw_reg,image_id_worm,abw_yolo,pbw_yolo,abw_cls,pbw_cls
0,3.873047,0.0,id_00332970f80fa9a47a39516d,4,0,1.0,0.000136
1,0.0,19.75,id_0035981bc3ae42eb5b57a317,0,21,0.000656,0.999512
2,9.164062,0.0,id_005102f664b820f778291dee,9,0,1.0,1.7e-05
3,4.582031,0.0,id_0066456f5fb2cd858c69ab39,5,0,0.963379,0.040863
4,0.0,0.0,id_007159c1fa015ba6f394deeb,0,0,0.005249,0.048584


In [27]:
pred_df[pred_df.isna().any(axis=1)]

Unnamed: 0,abw_reg,pbw_reg,image_id_worm,abw_yolo,pbw_yolo,abw_cls,pbw_cls


In [28]:
test_df[test_df['image_id_worm']=='id_9e6ce14967f173f31dbf00cc.jpg']

Unnamed: 0,image_id_worm


In [29]:
test_df.head()

Unnamed: 0,image_id_worm
0,id_00332970f80fa9a47a39516d.jpg
1,id_0035981bc3ae42eb5b57a317.jpg
2,id_005102f664b820f778291dee.jpg
3,id_0066456f5fb2cd858c69ab39.jpg
4,id_007159c1fa015ba6f394deeb.jpg


### Run!

In [30]:
def training_loop(fold):
    out = pd.DataFrame()
    pred_ = pd.DataFrame()
    tr = oof_df[oof_df['fold']!=fold].reset_index(drop=True)
    val = oof_df[oof_df['fold']==fold].reset_index(drop=True)

    val_ix = oof_df[oof_df['fold']==fold].index
    
    X_tr = np.round(tr[['abw_yolo','pbw_yolo','abw_reg','pbw_reg','abw_cls','pbw_cls']])
    Y_tr = tr[['abw','pbw']]

    X_val = np.round(val[['abw_yolo','pbw_yolo','abw_reg','pbw_reg','abw_cls','pbw_cls']])
    Y_val = val[['abw','pbw']]

    X_test = np.round(pred_df[['abw_yolo','pbw_yolo','abw_reg','pbw_reg','abw_cls','pbw_cls']])
        
    for i in range(2):
        model = LGBMRegressor(n_estimators=220,
                              n_jobs=-1,
                              learning_rate=0.02,
                              random_state=42,
#                               num_leaves=101,
                              metric='mae',
                             subsample=0.2,)
        model.fit(X_tr, Y_tr.iloc[:,i], 
                  eval_set=[(X_tr, Y_tr.iloc[:,i]),(X_val,Y_val.iloc[:,i])],
                 eval_names = ['train','val'])    
        p = np.round(model.predict(X_val).clip(0,600))
        pred = np.round(model.predict(X_test).clip(0,600))
        out = pd.concat([out,pd.DataFrame(pred)],1)
        pred_ = pd.concat([pred_,pd.DataFrame(p)],1)
    
    pred_.columns = ['abw','pbw']
    
    print(f'Fold:{fold} mean_absolute_error:{mean_absolute_error(Y_val.values.reshape(-1,1),pred_.values.reshape(-1,1))}')
    return model,out,pred_,val_ix,mean_absolute_error(Y_val.values.reshape(-1,1),pred_.values.reshape(-1,1))

In [31]:
OOF_CONSOL = pd.DataFrame()
OOF_CONSOL['abw'] = OOF_CONSOL['pbw'] = np.zeros(len(train_new))
mae_ = 0
for fld in range(5):
    model,pred,oof,val_ix, mae = training_loop(fld)
    OOF_CONSOL.iloc[val_ix,:] = oof
    mae_ += mae/5
    
print('Avg MAE:',mae_)

[1]	train's l1: 1.98982	val's l1: 1.93219
[2]	train's l1: 1.95074	val's l1: 1.89491
[3]	train's l1: 1.91245	val's l1: 1.85838
[4]	train's l1: 1.87492	val's l1: 1.82259
[5]	train's l1: 1.83814	val's l1: 1.78751
[6]	train's l1: 1.80209	val's l1: 1.75313
[7]	train's l1: 1.76677	val's l1: 1.71944
[8]	train's l1: 1.73215	val's l1: 1.68642
[9]	train's l1: 1.69823	val's l1: 1.65406
[10]	train's l1: 1.66498	val's l1: 1.62235
[11]	train's l1: 1.6324	val's l1: 1.59127
[12]	train's l1: 1.60047	val's l1: 1.56081
[13]	train's l1: 1.56918	val's l1: 1.53097
[14]	train's l1: 1.53852	val's l1: 1.50173
[15]	train's l1: 1.50866	val's l1: 1.4733
[16]	train's l1: 1.47945	val's l1: 1.44555
[17]	train's l1: 1.45083	val's l1: 1.41835
[18]	train's l1: 1.42279	val's l1: 1.3917
[19]	train's l1: 1.39533	val's l1: 1.3656
[20]	train's l1: 1.36844	val's l1: 1.34002
[21]	train's l1: 1.34209	val's l1: 1.31494
[22]	train's l1: 1.3163	val's l1: 1.29047
[23]	train's l1: 1.29102	val's l1: 1.2664
[24]	train's l1: 1.26626	v

Fold:0 mean_absolute_error:1.8828510938602683
[1]	train's l1: 1.91986	val's l1: 2.03555
[2]	train's l1: 1.88226	val's l1: 1.99531
[3]	train's l1: 1.84541	val's l1: 1.95588
[4]	train's l1: 1.80929	val's l1: 1.91724
[5]	train's l1: 1.7739	val's l1: 1.87938
[6]	train's l1: 1.73921	val's l1: 1.84226
[7]	train's l1: 1.70522	val's l1: 1.80589
[8]	train's l1: 1.67191	val's l1: 1.77027
[9]	train's l1: 1.63927	val's l1: 1.73535
[10]	train's l1: 1.60727	val's l1: 1.70115
[11]	train's l1: 1.57592	val's l1: 1.66762
[12]	train's l1: 1.54522	val's l1: 1.6348
[13]	train's l1: 1.51534	val's l1: 1.60291
[14]	train's l1: 1.48607	val's l1: 1.57168
[15]	train's l1: 1.45738	val's l1: 1.54107
[16]	train's l1: 1.42929	val's l1: 1.51108
[17]	train's l1: 1.40182	val's l1: 1.4817
[18]	train's l1: 1.37493	val's l1: 1.45298
[19]	train's l1: 1.34858	val's l1: 1.42483
[20]	train's l1: 1.32277	val's l1: 1.39725
[21]	train's l1: 1.29749	val's l1: 1.37019
[22]	train's l1: 1.27272	val's l1: 1.34371
[23]	train's l1: 1.2

Fold:1 mean_absolute_error:1.5921610169491525
[1]	train's l1: 1.97759	val's l1: 1.94022
[2]	train's l1: 1.93884	val's l1: 1.90204
[3]	train's l1: 1.90087	val's l1: 1.86463
[4]	train's l1: 1.86365	val's l1: 1.82796
[5]	train's l1: 1.82718	val's l1: 1.79203
[6]	train's l1: 1.79143	val's l1: 1.75682
[7]	train's l1: 1.75641	val's l1: 1.72229
[8]	train's l1: 1.72208	val's l1: 1.68845
[9]	train's l1: 1.68844	val's l1: 1.6553
[10]	train's l1: 1.65547	val's l1: 1.6228
[11]	train's l1: 1.62316	val's l1: 1.59094
[12]	train's l1: 1.5915	val's l1: 1.55975
[13]	train's l1: 1.56047	val's l1: 1.52916
[14]	train's l1: 1.53013	val's l1: 1.4993
[15]	train's l1: 1.50054	val's l1: 1.47031
[16]	train's l1: 1.47155	val's l1: 1.44186
[17]	train's l1: 1.44313	val's l1: 1.41399
[18]	train's l1: 1.41536	val's l1: 1.38678
[19]	train's l1: 1.38817	val's l1: 1.36012
[20]	train's l1: 1.36155	val's l1: 1.33402
[21]	train's l1: 1.33547	val's l1: 1.30844
[22]	train's l1: 1.30991	val's l1: 1.28336
[23]	train's l1: 1.28

Fold:2 mean_absolute_error:1.7062367115520907
[1]	train's l1: 1.95995	val's l1: 1.95683
[2]	train's l1: 1.92143	val's l1: 1.91836
[3]	train's l1: 1.88368	val's l1: 1.88066
[4]	train's l1: 1.84669	val's l1: 1.84372
[5]	train's l1: 1.81044	val's l1: 1.80744
[6]	train's l1: 1.77491	val's l1: 1.77195
[7]	train's l1: 1.74009	val's l1: 1.73711
[8]	train's l1: 1.70597	val's l1: 1.70304
[9]	train's l1: 1.67254	val's l1: 1.66957
[10]	train's l1: 1.63977	val's l1: 1.63681
[11]	train's l1: 1.60765	val's l1: 1.60473
[12]	train's l1: 1.57618	val's l1: 1.57326
[13]	train's l1: 1.54535	val's l1: 1.5424
[14]	train's l1: 1.51537	val's l1: 1.51239
[15]	train's l1: 1.48602	val's l1: 1.48304
[16]	train's l1: 1.45727	val's l1: 1.45437
[17]	train's l1: 1.42913	val's l1: 1.4263
[18]	train's l1: 1.40161	val's l1: 1.39896
[19]	train's l1: 1.37465	val's l1: 1.37212
[20]	train's l1: 1.34822	val's l1: 1.34584
[21]	train's l1: 1.32233	val's l1: 1.32009
[22]	train's l1: 1.29695	val's l1: 1.29485
[23]	train's l1: 1.

[1]	train's l1: 1.97056	val's l1: 1.95671
[2]	train's l1: 1.93185	val's l1: 1.91831
[3]	train's l1: 1.89391	val's l1: 1.88055
[4]	train's l1: 1.85673	val's l1: 1.84354
[5]	train's l1: 1.8203	val's l1: 1.80727
[6]	train's l1: 1.78459	val's l1: 1.77172
[7]	train's l1: 1.7496	val's l1: 1.73689
[8]	train's l1: 1.7153	val's l1: 1.70275
[9]	train's l1: 1.6817	val's l1: 1.66929
[10]	train's l1: 1.64876	val's l1: 1.63651
[11]	train's l1: 1.61648	val's l1: 1.60438
[12]	train's l1: 1.58487	val's l1: 1.57285
[13]	train's l1: 1.55388	val's l1: 1.54198
[14]	train's l1: 1.52365	val's l1: 1.51179
[15]	train's l1: 1.49417	val's l1: 1.48223
[16]	train's l1: 1.46526	val's l1: 1.45333
[17]	train's l1: 1.43695	val's l1: 1.42497
[18]	train's l1: 1.40921	val's l1: 1.39718
[19]	train's l1: 1.38203	val's l1: 1.36999
[20]	train's l1: 1.35542	val's l1: 1.34327
[21]	train's l1: 1.32936	val's l1: 1.31715
[22]	train's l1: 1.30389	val's l1: 1.2916
[23]	train's l1: 1.27896	val's l1: 1.2665
[24]	train's l1: 1.25454	v

In [32]:
np.round(oof_df[['abw_yolo','pbw_yolo']].values+oof_df[['abw_reg','pbw_reg']].values)

array([[  0.,  87.],
       [  0.,  15.],
       [  0.,  14.],
       ...,
       [  0., 204.],
       [  0.,  60.],
       [  0.,  65.]])

In [33]:
np.round(oof_df[['abw_cls','pbw_cls']])

Unnamed: 0,abw_cls,pbw_cls
0,0.0,1.0
1,0.0,1.0
2,0.0,1.0
3,0.0,1.0
4,0.0,0.0
...,...,...
7072,0.0,1.0
7073,1.0,0.0
7074,0.0,1.0
7075,0.0,1.0


In [34]:
oof_df['pbw'].value_counts()

0      2131
1       544
2       385
3       380
4       334
       ... 
229       1
527       1
135       1
166       1
241       1
Name: pbw, Length: 244, dtype: int64

In [154]:
_ = (0.75*oof_df[['abw_yolo','pbw_yolo']].values+0.25*oof_df[['abw_reg','pbw_reg']].values)*(np.round(oof_df[['abw_cls','pbw_cls']]))

In [155]:
mean_absolute_error(oof_df[['abw','pbw']].iloc[:,],_.iloc[:,])

1.5755057055279413

In [145]:
mean_absolute_error(oof_df[['abw','pbw']].iloc[:,1],oof_df[['abw_yolo','pbw_reg']].iloc[:,1])

3.8152787425650434

In [39]:
temp = pd.DataFrame(np.round(0.75*pred_df[['abw_yolo','pbw_yolo']].values+0.25*pred_df[['abw_reg','pbw_reg']].values))
temp

Unnamed: 0,0,1
0,4.0,0.0
1,0.0,21.0
2,9.0,0.0
3,5.0,0.0
4,0.0,0.0
...,...,...
2798,0.0,8.0
2799,0.0,123.0
2800,0.0,0.0
2801,0.0,0.0


In [43]:
oof_df[['abw_cls','pbw_cls']].head(1)

Unnamed: 0,abw_cls,pbw_cls
0,0.00054,0.999512


In [90]:
pbw = []
abw = []
for i in oof_df.itertuples():
    if i.pbw_cls < 0.2:
        if ((i.pbw_yolo < 3) and (i.pbw_reg < 3)):
            pbw.append(0)
        else:
            pbw.append(min(i.pbw_yolo,i.pbw_reg))
    elif i.pbw_cls > 0.5:
        if ((i.pbw_yolo == 0) and (i.pbw_reg == 0)):
            pbw.append(1)
        elif ((i.pbw_yolo == 0) or (i.pbw_reg == 0)):
            pbw.append(max(i.pbw_yolo,i.pbw_reg))
        else :
            pbw.append(round(0.75*i.pbw_yolo+0.25*i.pbw_reg))
    else:
        pbw.append(min(1,round(0.75*i.pbw_yolo+0.25*i.pbw_reg)))

    if i.abw_cls < 0.2:
        if ((i.abw_yolo == 0) and (i.abw_reg == 0)):
            abw.append(0)
        else:
            abw.append(min(i.abw_yolo,i.abw_reg))
    elif i.abw_cls > 0.5:
        if ((i.abw_yolo == 0) and (i.abw_reg == 0)):
            abw.append(1)
        elif ((i.abw_yolo == 0) or (i.abw_reg == 0)):
            abw.append(max(i.abw_yolo,i.abw_reg))
        else :
            abw.append(round(0.75*i.abw_yolo+0.25*i.abw_reg))
    else:
        abw.append(min(1,round(0.75*i.abw_yolo+0.25*i.abw_reg)))


oof_consol = pd.concat([pd.DataFrame(abw),pd.DataFrame(pbw)],1)
print(mean_absolute_error(oof_df[['abw','pbw']].iloc[:,],oof_consol.values))


1.5494496840212462


In [94]:
pbw = []
abw = []
for i in oof_df.itertuples():
    if i.pbw_cls < 0.5:
        if ((i.pbw_yolo < 2) and (i.pbw_reg < 2)):
            pbw.append(0)
        else:
            pbw.append(min(i.pbw_yolo,i.pbw_reg))
    elif i.pbw_cls >= 0.5:
        if ((i.pbw_yolo == 0) and (i.pbw_reg == 0)):
            pbw.append(1)
        elif ((i.pbw_yolo == 0) or (i.pbw_reg == 0)):
            pbw.append(max(i.pbw_yolo,i.pbw_reg))
        else :
            pbw.append(round(0.75*i.pbw_yolo+0.25*i.pbw_reg))

    if i.abw_cls < 0.5:
        if ((i.abw_yolo == 0) and (i.abw_reg == 0)):
            abw.append(0)
        else:
            abw.append(min(i.abw_yolo,i.abw_reg))
    elif i.abw_cls >= 0.5:
        if ((i.abw_yolo == 0) and (i.abw_reg == 0)):
            abw.append(1)
        elif ((i.abw_yolo == 0) or (i.abw_reg == 0)):
            abw.append(max(i.abw_yolo,i.abw_reg))
        else :
            abw.append(round(0.75*i.abw_yolo+0.25*i.abw_reg))


oof_consol = pd.concat([pd.DataFrame(abw),pd.DataFrame(pbw)],1)
print(mean_absolute_error(oof_df[['abw','pbw']].iloc[:,],oof_consol.values))


1.5420681601849522


In [73]:
# pred_consol = pd.DataFrame(np.round(0.75*pred_df[['abw_yolo','pbw_yolo']].values+0.25*pred_df[['abw_reg','pbw_reg']].values)*(np.round(pred_df[['abw_cls','pbw_cls']]).values))



1.5493790326152832

In [110]:
pbw = []
abw = []
for i in pred_df.itertuples():
    if i.pbw_cls < 0.5:
        if ((i.pbw_yolo < 2) and (i.pbw_reg < 2)):
            pbw.append(0)
        else:
            pbw.append(min(i.pbw_yolo,i.pbw_reg))
    elif i.pbw_cls >= 0.5:
        if ((i.pbw_yolo == 0) and (i.pbw_reg == 0)):
            pbw.append(1)
        elif ((i.pbw_yolo == 0) or (i.pbw_reg == 0)):
            pbw.append(max(i.pbw_yolo,i.pbw_reg))
        else :
            pbw.append(round(0.75*i.pbw_yolo+0.25*i.pbw_reg))

    if i.abw_cls < 0.5:
        if ((i.abw_yolo == 0) and (i.abw_reg == 0)):
            abw.append(0)
        else:
            abw.append(min(i.abw_yolo,i.abw_reg))
    elif i.abw_cls >= 0.5:
        if ((i.abw_yolo == 0) and (i.abw_reg == 0)):
            abw.append(1)
        elif ((i.abw_yolo == 0) or (i.abw_reg == 0)):
            abw.append(max(i.abw_yolo,i.abw_reg))
        else :
            abw.append(round(0.75*i.abw_yolo+0.25*i.abw_reg))


pred_consol = pd.concat([pd.DataFrame(abw),pd.DataFrame(pbw)],1)

pred_consol.shape

(2803, 2)

In [126]:
(pred_df[['abw_cls','pbw_cls']]<0.4).values*1

array([[0, 1],
       [1, 0],
       [0, 1],
       ...,
       [1, 1],
       [1, 1],
       [1, 0]])

In [131]:
pred_consol = pd.DataFrame((0.75*pred_df[['abw_yolo','pbw_yolo']].values+0.25*pred_df[['abw_reg','pbw_reg']].values)*(np.round(pred_df[['abw_cls','pbw_cls']]).values))
# pred_consol = pd.DataFrame((0.75*pred_df[['abw_yolo','pbw_yolo']].values+0.25*pred_df[['abw_reg','pbw_reg']].values)*((pred_df[['abw_cls','pbw_cls']].values<0.4)*1))
# pred_consol = pd.concat([pd.DataFrame(abw),pd.DataFrame(pbw)],1)
pred_consol

Unnamed: 0,0,1
0,3.968262,0.000000
1,0.000000,20.687500
2,9.041016,0.000000
3,4.895508,0.000000
4,0.000000,0.000000
...,...,...
2798,0.000000,7.608398
2799,0.000000,122.968750
2800,0.000000,0.000000
2801,0.000000,0.000000


In [100]:
pred_consol.shape

(2803, 2)

In [132]:
# pred_consol.rename(columns = {0:'abw',1:'pbw'},inplace=True)
pred_consol.columns = ['abw','pbw']
pred_consol.head()

Unnamed: 0,abw,pbw
0,3.968262,0.0
1,0.0,20.6875
2,9.041016,0.0
3,4.895508,0.0
4,0.0,0.0


In [133]:
pred_consol.isna().sum()

abw    0
pbw    0
dtype: int64

In [134]:
pred_consol.shape

(2803, 2)

In [135]:
submit = pd.read_csv(os.path.join(DIR,'SampleSubmission.csv'))
submit.shape

(5606, 2)

In [136]:
submit.head(1)

Unnamed: 0,image_id_worm,number_of_worms
0,id_00332970f80fa9a47a39516d_abw,0


In [137]:
submit.loc[::2,'number_of_worms'] = pred_consol.loc[:,'abw'].values
submit.loc[1::2,'number_of_worms'] = pred_consol.loc[:,'pbw'].values

In [138]:
submit.head(10)

Unnamed: 0,image_id_worm,number_of_worms
0,id_00332970f80fa9a47a39516d_abw,3.968262
1,id_00332970f80fa9a47a39516d_pbw,0.0
2,id_0035981bc3ae42eb5b57a317_abw,0.0
3,id_0035981bc3ae42eb5b57a317_pbw,20.6875
4,id_005102f664b820f778291dee_abw,9.041016
5,id_005102f664b820f778291dee_pbw,0.0
6,id_0066456f5fb2cd858c69ab39_abw,4.895508
7,id_0066456f5fb2cd858c69ab39_pbw,0.0
8,id_007159c1fa015ba6f394deeb_abw,0.0
9,id_007159c1fa015ba6f394deeb_pbw,0.0


In [139]:
submit.shape

(5606, 2)

In [140]:
submit.isna().sum()

image_id_worm      0
number_of_worms    0
dtype: int64

In [141]:
submit[submit['number_of_worms'].isna()]

Unnamed: 0,image_id_worm,number_of_worms


In [142]:
submit.to_csv('///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/runs/Mixed/new/NB_EXP_V0_003_mixed/reg_yolo_cls_simple_rule_based_noround.csv',index=False)

### Fin 