## Preprocessing the dataset
* converting the trimap masks into binary masks.
* Creating a annotations csv file with only species label.

In [13]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np

In [5]:
data_path = os.path.join(os.path.dirname(os.getcwd()),"Data")
data_path

'd:\\Study\\Conda Projects\\opdl\\Data'

In [6]:
images_path = os.path.join(data_path,"images\\images")
images_path

'd:\\Study\\Conda Projects\\opdl\\Data\\images\\images'

In [35]:
annotations_path = os.path.join(data_path,"annotations\\annotations\\trimaps")
annotations_path

'd:\\Study\\Conda Projects\\opdl\\Data\\annotations\\annotations\\trimaps'

In [12]:
output_path = os.path.join(data_path,"masks")
output_path

'd:\\Study\\Conda Projects\\opdl\\Data\\masks'

### Creation of the binary masks for segmentation
* Merged the border with the body
* Standardized it to 0 and 255 pixel.

In [42]:
def convert_to_binary(path,img):
    masks_file = img.split(".")[0]+".png"
    img_file = os.path.join(path,masks_file)
    img_trimap = mpimg.imread(img_file)
    img_trimap = img_trimap.copy()
    img_trimap[np.isclose(img_trimap,0.00784314)] = 0 # background
    img_trimap[np.isclose(img_trimap,0.00392157)] = 255 # object
    img_trimap[np.isclose(img_trimap,0.01176471)] = 255 # Border
    mpimg.imsave(os.path.join(output_path,img),img_trimap,cmap='gray')


In [46]:
count_of_masks_processed=0
for files in os.listdir(images_path):
    if files.endswith(('.jpg')):
        convert_to_binary(annotations_path,files)
        count_of_masks_processed+=1
print("Count of masks processed are: ",count_of_masks_processed)

Count of masks processed are:  7390


### Creation of the annotation file

In [47]:
annotations_trainval = os.path.join(data_path,"annotations\\annotations\\trainval.txt")

In [49]:
column_headers = ["image","class_id","species","breed_id"]
df_annotations = pd.read_csv(annotations_trainval, names=column_headers,sep='\s+')

In [50]:
df_annotations.head()

Unnamed: 0,image,class_id,species,breed_id
0,Abyssinian_100,1,1,1
1,Abyssinian_101,1,1,1
2,Abyssinian_102,1,1,1
3,Abyssinian_103,1,1,1
4,Abyssinian_104,1,1,1


In [51]:
df_annotations.drop(columns=['class_id','breed_id'],inplace=True)

In [52]:
df_annotations.head()

Unnamed: 0,image,species
0,Abyssinian_100,1
1,Abyssinian_101,1
2,Abyssinian_102,1
3,Abyssinian_103,1
4,Abyssinian_104,1


In [53]:
df_annotations.groupby('species').count()

Unnamed: 0_level_0,image
species,Unnamed: 1_level_1
1,1188
2,2492


In [55]:
path_annotations_csv = os.path.join(data_path,'annotations.csv')
path_annotations_csv

'd:\\Study\\Conda Projects\\opdl\\Data\\annotations.csv'

In [56]:
df_annotations.to_csv(path_annotations_csv,index=False)