### Regression notebook for Wadhwani AI competition

In [1]:
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

from pycocotools import mask as mutils
from pycocotools.coco import COCO
from mmcv import Config
from mmdet.apis import set_random_seed

import random, os
import numpy as np
import matplotlib.pyplot as plt
import warnings
from fastai.vision.all import *
import cv2
import matplotlib.patches as patches

from shapely.wkt import loads
from shapely.ops import clip_by_rect
from shapely.geometry import box
from tqdm import tqdm



In [2]:
class CFG:
    seed = 46
    n_splits = 5
    SZ = 1024
    debug = True

random.seed(CFG.seed)
os.environ["PYTHONHASHSEED"] = str(CFG.seed)
np.random.seed(CFG.seed)
plt.rcParams["font.size"] = 13
warnings.filterwarnings('ignore')

In [3]:
DIR = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/'
IMG_PATH = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/images'
submit = pd.read_csv(os.path.join(DIR,'SampleSubmission.csv'))
train = pd.read_csv(os.path.join(DIR,'Train.csv'))
test_df = pd.read_csv(os.path.join(DIR,'Test.csv'))
bbox_df = pd.read_csv(os.path.join(DIR,'images_bboxes.csv'))
bbox_df = bbox_df.groupby('image_id').agg(lambda x: list(x)).reset_index()
# bbox_df['worm_type'] = bbox_df['worm_type'].map({'pbw':0,'abw':1})
# bbox_df['worm_type'].fillna(2,inplace=True)
# bbox_df['worm_type'] = bbox_df['worm_type'].astype(int)

if CFG.debug:
    bbox_df = bbox_df.iloc[:1000,:]
    #     train = train.set_index('image_id_worm').loc[bbox_df['image_id']].reset_index()
    
VERSION = "NB_EXP_DETECTION_V0_001"
MODEL_FOLDER = Path(f"///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/models/{VERSION}/")
os.makedirs(MODEL_FOLDER,exist_ok=True)
KERNEL_TYPE = f"DETECTION_BASELINE"

print(MODEL_FOLDER)
print(KERNEL_TYPE)

/mnt/c/Personal/Competitions/Zindi/Wadhwani AI/models/NB_EXP_DETECTION_V0_001
DETECTION_BASELINE


In [4]:
train.head()

Unnamed: 0,image_id_worm,worm_type,number_of_worms
0,id_0002ea6f15c7fa6f4c221783.jpg,pbw,51
1,id_0005ef295aafe6acc63587db.jpg,pbw,8
2,id_00084298dd030a500033ff78.jpg,,0
3,id_00093f2c76f6488737325859.jpg,pbw,12
4,id_000b2e6c437c643f25d4a6c3.jpg,pbw,87


In [5]:
bbox_df.head()

Unnamed: 0,image_id,worm_type,geometry
0,id_0002ea6f15c7fa6f4c221783.jpg,"[pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw]","[POLYGON ((3195.39 1829.19, 3195.39 2014.47, 3101.91 2014.47, 3101.91 1829.19, 3195.39 1829.19)), POLYGON ((3302.5499999999997 1974.09, 3302.5499999999997 2120.21, 3145.74 2120.21, 3145.74 1974.09, 3302.5499999999997 1974.09)), POLYGON ((3552.86 2034.34, 3552.86 2183.47, 3432.23 2183.47, 3432.23 2034.34, 3552.86 2034.34)), POLYGON ((3234.4300000000003 1750.44, 3234.4300000000003 1845.69, 3095.59 1845.69, 3095.59 1750.44, 3234.4300000000003 1750.44)), POLYGON ((1939.21 605.89, 1939.21 697.27, 1744.84 697.27, 1744.84 605.89, 1939.21 605.89)), POLYGON ((2895.94 1724.52, 2895.94 1813.62, 2712...."
1,id_0005ef295aafe6acc63587db.jpg,"[pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw]","[POLYGON ((1685.22 2631.2, 1685.22 2979.6099999999997, 1587.45 2979.6099999999997, 1587.45 2631.2, 1685.22 2631.2)), POLYGON ((1451.04 1850.31, 1451.04 2063.9, 1296.7 2063.9, 1296.7 1850.31, 1451.04 1850.31)), POLYGON ((1653.55 2321.35, 1653.55 2508.64, 1547.51 2508.64, 1547.51 2321.35, 1653.55 2321.35)), POLYGON ((1579.03 1169.51, 1579.03 1288.13, 1363.07 1288.13, 1363.07 1169.51, 1579.03 1169.51)), POLYGON ((1770.61 2292.43, 1770.61 2516.8999999999996, 1639.78 2516.8999999999996, 1639.78 2292.43, 1770.61 2292.43)), POLYGON ((900.5400000000001 1055.61, 900.5400000000001 1181.4299999999998..."
2,id_00084298dd030a500033ff78.jpg,[nan],[nan]
3,id_00093f2c76f6488737325859.jpg,"[pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw]","[POLYGON ((2131.8199999999997 606.34, 2131.8199999999997 692.9300000000001, 2011.86 692.9300000000001, 2011.86 606.34, 2131.8199999999997 606.34)), POLYGON ((2111.86 1853.32, 2111.86 1902.03, 2014.47 1902.03, 2014.47 1853.32, 2111.86 1853.32)), POLYGON ((2183.7 1938.75, 2183.7 2019.39, 2099.08 2019.39, 2099.08 1938.75, 2183.7 1938.75)), POLYGON ((1366.5900000000001 1076.31, 1366.5900000000001 1147.83, 1317.91 1147.83, 1317.91 1076.31, 1366.5900000000001 1076.31)), POLYGON ((2064.63 505.73, 2064.63 565.9300000000001, 2034.95 565.9300000000001, 2034.95 505.73, 2064.63 505.73)), POLYGON ((196..."
4,id_000b2e6c437c643f25d4a6c3.jpg,"[pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw]","[POLYGON ((1706.9618768284392 2570.031476936281, 1706.9618768284392 2655.0424331919194, 1521.7366004179107 2655.0424331919194, 1521.7366004179107 2570.031476936281, 1706.9618768284392 2570.031476936281)), POLYGON ((1949.4141192236411 2731.831792413831, 1949.4141192236411 2836.879654075499, 1806.2078136292744 2836.879654075499, 1806.2078136292744 2731.831792413831, 1949.4141192236411 2731.831792413831)), POLYGON ((1659.8141824179822 1838.9019674895733, 1659.8141824179822 2013.1839481008215, 1577.165495344846 2013.1839481008215, 1577.165495344846 1838.9019674895733, 1659.8141824179822 1838.9..."


In [6]:
def getBBOX_new(df,ix):
    img = df.loc[ix,'image_id']
    geom = df.loc[ix,'geometry']
    label = df.loc[ix,'worm_type']
    img = cv2.imread(f'///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/images/{img}')
    H,W,_ = img.shape
    labels = []
    
    bounds_out = []
    for i,g in enumerate(geom):
        bounding_box = loads(str(g))
        image_frame = box(0, 0, W, H)
        valid_box = clip_by_rect(bounding_box, *image_frame.bounds)
        bounds = valid_box.bounds
        bounds_out.append(bounds)
        labels.append(label[i])
        
    return bounds_out, H,W,labels

In [7]:
a,b,c,d = getBBOX_new(bbox_df,0)

In [9]:
def getBBOX(df,ix):
    img = df.loc[ix,'image_id']
    geom = df.loc[ix,'geometry']
    labels = df.loc[ix,'worm_type']
    img = cv2.imread(f'///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/images/{img}')
    H,W,_ = img.shape
    bounding_box = loads(str(geom))
    image_frame = box(0, 0, W, H)
    valid_box = clip_by_rect(bounding_box, *image_frame.bounds)
    bounds = valid_box.bounds
        
## QC
# Create figure and axes
#     xmaxtoxmin = bounds[2]-bounds[0]
#     ymaxtoymin = bounds[3]-bounds[1]
#     fig, ax = plt.subplots()
#     # Display the image
#     ax.imshow(img)
#     # Create a Rectangle patch
#     rect = patches.Rectangle(((bounds[0]), (bounds[1])), (xmaxtoxmin), (ymaxtoymin),
#                              linewidth=1, edgecolor='r', facecolor='none')
#     # Add the patch to the Axes
#     ax.add_patch(rect)

    return bounds, H,W

In [11]:
bbox_df.head(1)

Unnamed: 0,image_id,worm_type,geometry
0,id_0002ea6f15c7fa6f4c221783.jpg,"[pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw]","[POLYGON ((3195.39 1829.19, 3195.39 2014.47, 3101.91 2014.47, 3101.91 1829.19, 3195.39 1829.19)), POLYGON ((3302.5499999999997 1974.09, 3302.5499999999997 2120.21, 3145.74 2120.21, 3145.74 1974.09, 3302.5499999999997 1974.09)), POLYGON ((3552.86 2034.34, 3552.86 2183.47, 3432.23 2183.47, 3432.23 2034.34, 3552.86 2034.34)), POLYGON ((3234.4300000000003 1750.44, 3234.4300000000003 1845.69, 3095.59 1845.69, 3095.59 1750.44, 3234.4300000000003 1750.44)), POLYGON ((1939.21 605.89, 1939.21 697.27, 1744.84 697.27, 1744.84 605.89, 1939.21 605.89)), POLYGON ((2895.94 1724.52, 2895.94 1813.62, 2712...."


In [12]:
bbox_copy = bbox_df.copy()
new_bbox = []
Ht = []
Wd = []
ids = []
labels = []
for index in np.arange(len(bbox_copy)):
    try:
        id = bbox_copy.loc[index,'image_id']
        bounds,H,W,label = getBBOX_new(bbox_copy,index)
    except:
        continue
    
    ids.append(id)
    new_bbox.append(bounds)
    Ht.append(int(H))
    Wd.append(int(W))
    labels.append(label)

ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encountered number: 'nan'
ParseException: Expected word but encoun

In [13]:
new_df = pd.concat([pd.Series(ids),pd.Series(new_bbox),pd.Series(Ht),pd.Series(Wd),pd.Series(labels)],1)
new_df.rename(columns={0:'image_id',1:'bbox',2:'Height',3:'Width',4:'label'},inplace=True)
new_df.head(1)
# len(ids),len(new_bbox),len(Ht),len(Wd)

Unnamed: 0,image_id,bbox,Height,Width,label
0,id_0002ea6f15c7fa6f4c221783.jpg,"[(3101.91, 1829.19, 3195.39, 2014.47), (3145.74, 1974.09, 3302.5499999999997, 2120.21), (3432.23, 2034.34, 3552.86, 2183.47), (3095.59, 1750.44, 3234.4300000000003, 1845.69), (1744.84, 605.89, 1939.21, 697.27), (2712.27, 1724.52, 2895.94, 1813.62), (2094.23, 933.59, 2252.55, 1091.76), (3783.55, 1596.0, 3846.88, 1808.3899999999999), (3717.2, 1937.94, 3863.46, 2043.38), (2788.644600441517, 1971.828466519495, 2952.304600441517, 2111.848466519495), (1607.16, 911.92, 1794.13, 1085.1499999999999), (2786.78, 1788.52, 2922.48, 1871.37), (3644.83, 1864.13, 3702.13, 2005.73), (3673.48, 1746.63, 3760...",3472,4624,"[pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw]"


In [14]:
new_df.head()

Unnamed: 0,image_id,bbox,Height,Width,label
0,id_0002ea6f15c7fa6f4c221783.jpg,"[(3101.91, 1829.19, 3195.39, 2014.47), (3145.74, 1974.09, 3302.5499999999997, 2120.21), (3432.23, 2034.34, 3552.86, 2183.47), (3095.59, 1750.44, 3234.4300000000003, 1845.69), (1744.84, 605.89, 1939.21, 697.27), (2712.27, 1724.52, 2895.94, 1813.62), (2094.23, 933.59, 2252.55, 1091.76), (3783.55, 1596.0, 3846.88, 1808.3899999999999), (3717.2, 1937.94, 3863.46, 2043.38), (2788.644600441517, 1971.828466519495, 2952.304600441517, 2111.848466519495), (1607.16, 911.92, 1794.13, 1085.1499999999999), (2786.78, 1788.52, 2922.48, 1871.37), (3644.83, 1864.13, 3702.13, 2005.73), (3673.48, 1746.63, 3760...",3472,4624,"[pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw]"
1,id_0005ef295aafe6acc63587db.jpg,"[(1587.45, 2631.2, 1685.22, 2979.6099999999997), (1296.7, 1850.31, 1451.04, 2063.9), (1547.51, 2321.35, 1653.55, 2508.64), (1363.07, 1169.51, 1579.03, 1288.13), (1639.78, 2292.43, 1770.61, 2516.8999999999996), (709.83, 1055.61, 900.5400000000001, 1181.4299999999998), (654.54, 2289.89, 902.5799999999999, 2405.64), (701.36, 940.51, 910.12, 1089.47)]",4160,3120,"[pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw]"
2,id_00093f2c76f6488737325859.jpg,"[(2011.86, 606.34, 2131.8199999999997, 692.9300000000001), (2014.47, 1853.32, 2111.86, 1902.03), (2099.08, 1938.75, 2183.7, 2019.39), (1317.91, 1076.31, 1366.5900000000001, 1147.83), (2034.95, 505.73, 2064.63, 565.9300000000001), (1860.4, 2171.9, 1967.3700000000001, 2318.82), (1956.99, 1938.75, 2091.1, 2017.0), (2067.1, 515.63, 2118.22, 571.71), (2139.81, 1583.18, 2198.34, 1675.5800000000002), (1308.0700000000002, 1139.0, 1346.530362519626, 1218.9200000000003), (2278.03, 1478.79, 2419.7700000000004, 1565.55), (2413.67, 1943.41, 2517.4900000000002, 2014.92)]",3000,4000,"[pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw]"
3,id_000b2e6c437c643f25d4a6c3.jpg,"[(1521.7366004179107, 2570.031476936281, 1706.9618768284392, 2655.0424331919194), (1806.2078136292744, 2731.831792413831, 1949.4141192236411, 2836.879654075499), (1577.165495344846, 1838.9019674895733, 1659.8141824179822, 2013.1839481008215), (1201.8957810668226, 2229.9192316814765, 1342.6219239210814, 2363.982293690129), (1036.5984069205501, 1859.0114267908712, 1123.7145905922341, 2035.5277917689305), (2176.018095059821, 1477.398852528859, 2270.0921995362996, 1660.3728099779305), (2513.1169694338696, 1848.5745947826902, 2677.746652267707, 1911.3085230509432), (2782.2734350193496, 1231.690...",3008,4000,"[pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw, pbw]"
4,id_001149c0de05ea4941d966e0.jpg,"[(651.0, 1091.0, 1101.0, 1720.0), (2344.0, 1448.0, 2946.0, 1932.0), (1416.0, 1112.0, 1796.0, 1683.0), (1492.0, 1801.0, 2138.0, 2516.0), (1862.0, 180.0, 2347.0, 828.0)]",3120,4160,"[abw, abw, abw, abw, abw]"


In [None]:
np.savetxt(r'c:\data\np.txt', df.values, fmt='%d')

In [None]:
# bbox_copy[bbox_copy['image_id']=='id_efa2d99456fa83bfcf0a6d2b.jpg']['new_bbox']==()

In [None]:
# !ls '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/'

In [None]:
# import mmcv
# ann_file = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/ann_files/train_ann.txt'
# ann_list = mmcv.list_from_file(ann_file)
# ann_list

In [None]:
# data_infos = []

# ann_list = mmcv.list_from_file(ann_file)
# CLASSES = ('abw','pbw')
# cat2label = {k: i for i, k in enumerate(CLASSES)}
# for i, ann_line in enumerate(ann_list):
#     if ann_line != '#':
#         continue

#     height = int(ann_list[i+2])
#     width = int(ann_list[i+3])    
#     bbox_number = int(ann_list[i+4])

#     labels = []
#     bboxes = []

#     for anns in ann_list[i + 5:i + 5 + bbox_number]:
#         anns = (anns.split(" "))
#         print(anns,ann_list[i + 1])
#         bboxes.append([float(ann) for ann in anns[1:]])
#         labels.append(cat2label[(anns[0])])

#     data_infos.append(
#         dict(
#             filename=ann_list[i + 1],
#             width=width,
#             height=height,
#             ann=dict(
#                 bboxes=np.array(bboxes).astype(np.float32),
#                 labels=np.array(labels).astype(np.int64))))


In [None]:
import copy
import os.path as osp
import mmcv
import numpy as np
from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset

@DATASETS.register_module()
class WadhwaniDetectionDataset(CustomDataset):

    CLASSES = ('abw','pbw')

    def load_annotations(self, ann_file):
        cat2label = {k: i for i, k in enumerate(self.CLASSES)}
        data_infos = []

        ann_list = mmcv.list_from_file(ann_file)
        
        for i, ann_line in enumerate(ann_list):
            if ann_line != '#':
                continue
    
            height = int(ann_list[i+2])
            width = int(ann_list[i+3])    
            bbox_number = int(ann_list[i+4])
                
            labels = []
            bboxes = []
            
            for anns in ann_list[i + 5:i + 5 + bbox_number]:
                anns = (anns.split(" "))
                bboxes.append([float(ann) for ann in anns[1:]])
                labels.append(cat2label[(anns[0])])

            data_infos.append(
                dict(
                    filename=ann_list[i + 1],
                    width=width,
                    height=height,
                    ann=dict(
                        bboxes=np.array(bboxes).astype(np.float32),
                        labels=np.array(labels).astype(np.int64))))

        return data_infos

    def get_ann_info(self, idx):
        return self.data_infos[idx]['ann']

In [None]:
# import copy
# import os.path as osp

# import mmcv
# import numpy as np

# from mmdet.datasets.builder import DATASETS
# from mmdet.datasets.custom import CustomDataset

# @DATASETS.register_module()
# class WadhwaniDetectionDataset(CustomDataset):

#     CLASSES = ('abw','pbw')

#     def load_annotations(self, ann_file):
#         cat2label = {k: i for i, k in enumerate(self.CLASSES)}
#         data_infos = []
#         for image_id in self.ann_file['image_id'].unique():
#             filename = osp.join("///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/images", image_id)

#             image = mmcv.imread(filename)
#             height, width = image.shape[:2]

#             data_info = dict(filename=f'{image_id}', width=width, height=height)

#             lines = self.ann_file[self.ann_file['image_id']==image_id].reset_index(drop=True)

#             content = lines['new_bbox'].values
#             content_label = lines['worm_type'].values
#             bboxes = [[float(info) for info in x] for x in content]
#             labels = [x for x in content_label]

#             gt_bboxes = []
#             gt_labels = []
#             gt_bboxes_ignore = []
#             gt_labels_ignore = []

#             # filter 'DontCare'
#             for bbox_name, bbox in zip(labels, bboxes):
#                 if bbox_name in cat2label:
#                     gt_labels.append(cat2label[bbox_name])
#                     gt_bboxes.append(bbox)
#                 else:
#                     gt_labels_ignore.append(-1)
#                     gt_bboxes_ignore.append(bbox)

#             data_anno = dict(
#                 bboxes=np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
#                 labels=np.array(gt_labels, dtype=np.long),
#                 bboxes_ignore=np.array(gt_bboxes_ignore,
#                                        dtype=np.float32).reshape(-1, 4),
#                 labels_ignore=np.array(gt_labels_ignore, dtype=np.long))

#             data_info.update(ann=data_anno)
#             data_infos.append(data_info)

#         return data_infos
    
#     def get_ann_info(self, idx):
#         return self.data_infos[idx]['ann']

### Get kfolds

In [None]:
train.head()

In [None]:
def make_train_dataset():
    train_new_pbw = pd.DataFrame()
    train_new_pbw['image_id_worm']= train['image_id_worm'].unique()
    train_new_pbw = pd.merge(train_new_pbw,train[train['worm_type']=='pbw'].reset_index(drop=True),on='image_id_worm',how='left')
    train_new_pbw['worm_type'] = 'pbw'
    train_new_pbw.fillna(0,inplace=True)
    
    train_new_abw = pd.DataFrame()
    train_new_abw['image_id_worm']= train['image_id_worm'].unique()
    train_new_abw = pd.merge(train_new_abw,train[train['worm_type']=='abw'].reset_index(drop=True),on='image_id_worm',how='left')
    train_new_abw['worm_type'] = 'abw'
    train_new_abw.fillna(0,inplace=True)
    
    train_out = pd.concat([train_new_pbw,train_new_abw],0).reset_index(drop=True)
    
    assert len(train_out) == train['image_id_worm'].nunique()*2
    train_out = pd.pivot(train_out,'image_id_worm','worm_type','number_of_worms').reset_index()
    train_out[['abw','pbw']] = train_out[['abw','pbw']].astype(int)
    
    train_out['abw_cls'] = (train_out['abw']>0).astype(int)
    train_out['pbw_cls'] = (train_out['pbw']>0).astype(int)
    
    return train_out

train_new = make_train_dataset()

In [None]:
mskf = MultilabelStratifiedKFold(n_splits=CFG.n_splits, shuffle=True, random_state=42)
fold_ids = []
train_new['fold'] = 0

for train_index, test_index in mskf.split(train_new, train_new[['abw','pbw']]):
    fold_ids.append(test_index)
    
for fld in range(CFG.n_splits):
    valIx = fold_ids[fld]
    train_new.loc[valIx,'fold']=fld 

In [None]:
bbox_copy = pd.merge(bbox_copy,train_new[['image_id_worm','fold']],left_on='image_id',right_on='image_id_worm',how='left').drop('image_id_worm',1)
bbox_copy.head()

In [None]:
bbox_train = bbox_copy[bbox_copy['fold']!= 0].reset_index(drop=True)
bbox_val = bbox_copy[bbox_copy['fold']== 0].reset_index(drop=True)

In [None]:
i = 0
with open(f'///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/ann_files/train_ann.txt', 'w') as f:
    for img in bbox_train['image_id'].unique():
        df = bbox_train[bbox_train['image_id']==img].reset_index(drop=True)
        f.write('#')
        f.write('\n')        
        f.write(img)
        f.write('\n')
        f.write(str(df['H'][0]))
        f.write('\n')        
        f.write(str(df['W'][0]))
        f.write('\n')
        f.write(str(len(df)))
        f.write('\n')        
        for line in range(len(df)):
            f.write(df.iloc[line,1])
            f.write(" ")
            f.write(' '.join(map(str, df.iloc[line,3])))
            f.write('\n')

In [None]:
i = 0
with open(f'///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/ann_files/val_ann.txt', 'w') as f:
    for img in bbox_val['image_id'].unique():
        df = bbox_val[bbox_val['image_id']==img].reset_index(drop=True)
        f.write('#')
        f.write('\n')        
        f.write(img)
        f.write('\n')
        f.write(str(df['H'][0]))
        f.write('\n')        
        f.write(str(df['W'][0]))
        f.write('\n')
        f.write(str(len(df)))
        f.write('\n')        
        for line in range(len(df)):
            f.write(df.iloc[line,1])
            f.write(" ")
            f.write(' '.join(map(str, df.iloc[line,3])))
            f.write('\n')

In [None]:
# ls '///home/rajneesh/mmdetection/'

In [None]:
from mmcv import Config
cfg = Config.fromfile('///home/rajneesh/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py')

In [None]:
# train_ann = bbox_copy[bbox_copy['fold']!=0].reset_index(drop=True)
# val_ann = bbox_copy[bbox_copy['fold']==0].reset_index(drop=True)

In [None]:
# Modify dataset type and path
cfg.dataset_type = 'WadhwaniDetectionDataset'
cfg.data_root = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/'

cfg.data.test.type = 'WadhwaniDetectionDataset'
cfg.data.test.data_root = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/'
cfg.data.test.ann_file = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/ann_files/train_ann.txt'
cfg.data.test.img_prefix = 'images/'

cfg.data.train.type = 'WadhwaniDetectionDataset'
cfg.data.train.data_root = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/'
cfg.data.train.ann_file = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/ann_files/train_ann.txt'
cfg.data.train.img_prefix = 'images/'

cfg.data.val.type = 'WadhwaniDetectionDataset'
cfg.data.val.data_root = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/'
cfg.data.val.ann_file = '///mnt/c/Personal/Competitions/Zindi/Wadhwani AI/data/ann_files/val_ann.txt'
cfg.data.val.img_prefix = 'images/'

# modify num classes of the model in box head
cfg.model.roi_head.bbox_head.num_classes = 2
# We can still use the pre-trained Mask RCNN model though we do not need to
# use the mask branch
# cfg.load_from = 'checkpoints/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'

# Set up working dir to save files and logs.
cfg.work_dir = f'{MODEL_FOLDER}'

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
cfg.optimizer.lr = 0.02
cfg.lr_config.warmup = None
cfg.log_config.interval = 10

# Change the evaluation metric since we use customized dataset.
cfg.evaluation.metric = 'mAP'
# We can set the evaluation interval to reduce the evaluation times
cfg.evaluation.interval = 12
# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 12

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)
cfg.device='cuda'

# We can initialize the logger for training and have a look
# at the final config used for training
# print(f'Config:\n{cfg.pretty_text}')

In [None]:
cfg

In [None]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

# Build dataset
datasets = [build_dataset(cfg.data.train)]

# Build the detector
model = build_detector(
    cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))
# Add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_detector(model, datasets, cfg, distributed=False, validate=True)

In [None]:
cfg.data.train

### Fin 