# Importing necessery libraries

In [None]:
!pip install pycocotools

In [None]:
!pip install gdown

In [None]:
# importing libraries
import numpy as np
import pandas as pd
import json
from pycocotools.coco import COCO
import matplotlib.pyplot as plt
import os
import tarfile
import cv2
from tqdm import tqdm
from fastai.vision.all import *

# Setting up workspace

In [None]:
# function to extract the data
# tar_file - the path to the .tar file
# path - the path where it will be extracted
def extract(tar_file, path):
    opened_tar = tarfile.open(tar_file)
     
    if tarfile.is_tarfile(tar_file):
        opened_tar.extractall(path)
    else:
        print("The tar file you entered is not a tar file")

In [None]:
# Checking our environment and extracting files accordingly
isKaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')

In [None]:
workingDir = '/kaggle/working' if isKaggle else os.getcwd()
print('workingDir: ' + workingDir)

In [None]:
gdriveFolder = workingDir+'/Dat255_Course_Project'

if not os.path.exists(gdriveFolder):
    # https://drive.google.com/drive/u/1/folders/1ETw2awPpZROOLfea_t0QOAkF5TQaipfi
    !gdown 1ETw2awPpZROOLfea_t0QOAkF5TQaipfi --folder
else:
    print('Google drive folder already downloaded')

In [None]:
dataDir = workingDir+'/data'

if not os.path.exists(dataDir):
    extract(gdriveFolder+'/Dataset/public_test_release_2.1.tar', dataDir+'/test')
    extract(gdriveFolder+'/Dataset/public_training_set_release_2.1.tar', dataDir+'/train')
    extract(gdriveFolder+'/Dataset/public_validation_set_release_2.1.tar', dataDir+'/validate')
else:
    print('Dataset already extracted')

In [None]:
train_img_path = dataDir+'/train/images/'
train_annotations_path = dataDir+'/train/annotations.json'
train_coco = COCO(train_annotations_path)

validation_img_path = dataDir+'/validate/images/'
validation_annotations_path = dataDir+'/validate/annotations.json'
validation_coco = COCO(validation_annotations_path)

test_img_path = dataDir+'/test/images/'

train_mask_path = dataDir+'/train/mask/'
val_mask_path = dataDir+'/validate/mask/'

model_path = gdriveFolder+'/Model/model_1'

In [None]:
# Reading the annotation files
with open(train_annotations_path) as f:
    train_annotations = json.load(f)

with open(validation_annotations_path) as f:
    validation_annotations = json.load(f)

# Exploring the data

In [None]:
train_annotations.keys()

In [None]:
train_annotations["categories"][42]

In [None]:
train_annotations["info"][:]

In [None]:
train_annotations["images"][:5]

In [None]:
train_annotations["annotations"][0]

In [None]:
cat_ids = train_coco.getCatIds()
print(f'{len(cat_ids)}')

In [None]:
cats = train_coco.loadCats(cat_ids)
cat_names = [cat['name'] for cat in cats]
print(cat_names)

In [None]:
# Getting all categoriy with respect to their total images
imgs_per_cat = {}

for n, i in enumerate(cat_ids):
    imgIds = train_coco.getImgIds(catIds=i)
    label = cat_names[n]
    imgs_per_cat[label] = len(imgIds)

imgs_per_cat = sorted(imgs_per_cat.items(), key=lambda x: x[1], reverse=True)

print(imgs_per_cat)

In [None]:
%matplotlib inline

plt.figure(figsize=(30,5))
plt.bar(range(80), [val[1] for val in imgs_per_cat[:80]])
plt.xticks(range(80), [val[0] for val in imgs_per_cat[:80]])
plt.xticks(rotation=90)

plt.figure(figsize=(30,5))
plt.bar(range(80, 150, 1), [val[1] for val in imgs_per_cat[80:150]])
plt.xticks(range(80, 150, 1), [val[0] for val in imgs_per_cat[80:150]])
plt.xticks(rotation=90)

plt.figure(figsize=(30,5))
plt.bar(range(150, 230, 1), [val[1] for val in imgs_per_cat[150:230]])
plt.xticks(range(150, 230, 1), [val[0] for val in imgs_per_cat[150:230]])
plt.xticks(rotation=90)

plt.figure(figsize=(30,5))
plt.bar(range(230, len(imgs_per_cat), 1), [val[1] for val in imgs_per_cat[230:]])
plt.xticks(range(230, len(imgs_per_cat), 1), [val[0] for val in imgs_per_cat[230:]])
plt.xticks(rotation=90)

plt.show()

# Visualization

In [None]:
ann_ids = train_coco.getAnnIds(imgIds=train_annotations['images'][3]['id'])
anns = train_coco.loadAnns(ann_ids)

In [None]:
plt.imshow(plt.imread(train_img_path+train_annotations['images'][3]['file_name']))
plt.axis(False)

train_coco.showAnns(anns)

In [None]:
ann_ids = train_coco.getAnnIds(imgIds=train_annotations['images'][3]['id'])
anns = train_coco.loadAnns(ann_ids)

In [None]:
mask = train_coco.annToMask(anns[0])
for i in range(len(anns)):
    mask += train_coco.annToMask(anns[i]) * i

plt.imshow(mask)
plt.axis(False)
plt.show()

In [None]:
fig = plt.figure(figsize=(30, 10))

for i in range(0, 30):
    ann_ids = train_coco.getAnnIds(imgIds=train_annotations['images'][i]['id'])
    anns = train_coco.loadAnns(ann_ids)
    
    fig.add_subplot(3, 10, i+1)

    mask = train_coco.annToMask(anns[0])
    for j in range(len(anns)):
        mask += train_coco.annToMask(anns[j]) * j

    plt.imshow(mask)

    plt.axis(False)

# Preparing the data

In [None]:
# Function for creating masks
# maskpath - folder to put masks
# coco_anns - COCO annotations helper
def create_masks(maskpath, coco_anns):
    mask_path = maskpath
    if os.path.exists(mask_path):
        print('Path exists')
    else:
        os.mkdir(mask_path)

        img_info = coco_anns.loadImgs(coco_anns.getImgIds())

        for i in tqdm(range(len(img_info))):
            ann_ids = coco_anns.getAnnIds(imgIds=img_info[i]['id'])
            anns = coco_anns.loadAnns(ann_ids)
            
            mask = coco_anns.annToMask(anns[0])
            
            for j in range(len(anns)):
                mask += coco_anns.annToMask(anns[j]) * j
            
            cv2.imwrite(mask_path+f'{img_info[i]["file_name"]}', mask)

In [None]:
create_masks(train_mask_path, train_coco)
create_masks(val_mask_path, validation_coco)

In [None]:
# Define a regular function for get_y
def get_y_func(x):
    return train_mask_path + f'{x.stem}.jpg'

In [None]:
foodcoco = DataBlock(blocks=(ImageBlock, MaskBlock()),
    get_items=get_image_files,
    get_y=get_y_func,
    item_tfms=[Resize((256), method='squish')],
    n_inp=1)

In [None]:
dls = foodcoco.dataloaders(train_img_path[:-1], bs=8)
dls.show_batch()

# Training and fine tuning the model

In [None]:
learn = unet_learner(dls, resnet18, n_out=323)

In [None]:
#learn.fine_tune(1) # Large dataset, only doing 1 epoch to save time

# Evaluting the model

In [None]:
# Saving the model
#learn.save('model_1')
#learn.export('model_1_ex.pkl')

In [None]:
# Reloading the trained model
learn.load(model_path)

In [None]:
learn.show_results(max_n=5, figsize=(12,6))

In [None]:
# Running out of memory, when predicting on the entire test set

# Predicting on the test data
test_files = get_image_files(test_img_path)
test_dl = learn.dls.test_dl(test_files[:5]) # Limiting to 5 files
preds = learn.get_preds(dl=test_dl)

In [None]:
test_dl.show_batch()

In [None]:
pred_1 = preds[0][0] # Change last index to view masks to corresponding image
pred_arx = pred_1.argmax(dim=0)
plt.imshow(pred_arx)
plt.axis(False)