In [2]:
import numpy as np
import pandas as pd
import cv2 as cv
import matplotlib.pyplot as plt
import os
from IPython.display import clear_output
import shutil
import random

%matplotlib inline 

In [31]:
# Preparing Directories
BEAR_DATA_PATH = '../bear_images/bear_images/'

train_img_path = "../yolo_data/train/images/"
val_img_path = "../yolo_data/val/images/"

train_label_path = "../yolo_data/train/labels/"
val_label_path = "../yolo_data/val/labels/"

PATH_LIST = [train_img_path,val_img_path,train_label_path,val_label_path]

for path in PATH_LIST:
    os.makedirs(path, exist_ok=True)

In [3]:
data = pd.read_csv('../csv_files/train.csv')
data.shape

(340, 6)

In [33]:
# Separate images with bears and without bears

bears_df = data[data['confidence'] == 1.0]
not_bears_df = data[data['confidence'] == 0.0]

In [34]:
bear_names = bears_df['file_name'].to_list()

random.seed(42)

k = int(len(bear_names)*0.25)
val_bears = random.sample(bear_names, k=k)
train_bears = list(set(bear_names).difference(set(val_bears)))



for file in val_bears:
    src_file = os.path.join(BEAR_DATA_PATH, file)
    dst_file = os.path.join(val_img_path, file)

    shutil.copy(src_file, dst_file)

for file in train_bears:
    src_file = os.path.join(BEAR_DATA_PATH, file)
    dst_file = os.path.join(train_img_path, file)

    shutil.copy(src_file, dst_file)


In [35]:
# Getting Labels
bears_df.head()

Unnamed: 0,file_name,x1,y1,x2,y2,confidence
1,image_102.jpeg,282,223,755,723,1.0
4,image_105.jpeg,189,328,402,728,1.0
6,image_107.jpeg,276,439,523,767,1.0
7,image_111.jpeg,129,381,346,653,1.0
9,image_113.png,873,88,996,248,1.0


In [36]:
shapes = {'file_name':[],
            'file_height':[],
                'file_width':[]
                
                }

bears_img_dir = list(BEAR_DATA_PATH + bears_df['file_name'])

for image in bears_img_dir:
    img = cv.imread(image)
    img_name = image.split('/')[-1]
    height, width, channels = img.shape 
    shapes['file_name'].append(img_name)
    shapes['file_width'].append(width)
    shapes['file_height'].append(height)


In [37]:
shapes_df = pd.DataFrame(shapes)     
shapes_df.head()  

Unnamed: 0,file_name,file_height,file_width
0,image_102.jpeg,768,768
1,image_105.jpeg,768,768
2,image_107.jpeg,768,768
3,image_111.jpeg,768,768
4,image_113.png,256,1024


In [38]:
bears_df = bears_df.set_index('file_name').join(shapes_df.set_index('file_name'))
bears_df

Unnamed: 0_level_0,x1,y1,x2,y2,confidence,file_height,file_width
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
image_102.jpeg,282,223,755,723,1.0,768,768
image_105.jpeg,189,328,402,728,1.0,768,768
image_107.jpeg,276,439,523,767,1.0,768,768
image_111.jpeg,129,381,346,653,1.0,768,768
image_113.png,873,88,996,248,1.0,256,1024
...,...,...,...,...,...,...,...
image_479.jpeg,329,81,767,407,1.0,768,768
image_481.jpeg,118,279,524,612,1.0,768,768
image_483.jpeg,313,134,767,598,1.0,768,768
image_487.jpeg,204,370,595,666,1.0,768,768


In [39]:
bears_df['x_center'] = (bears_df['x1'] + bears_df['x2']) / 2    
bears_df['y_center'] = (bears_df['y1'] + bears_df['y2']) / 2 
bears_df['width'] = bears_df['x2'] - bears_df['x1']
bears_df['height'] = bears_df['y2'] - bears_df['y1']

bears_df['x_center_norm'] = bears_df['x_center'] / bears_df['file_width']
bears_df['y_center_norm'] = bears_df['y_center'] / bears_df['file_height']
bears_df['width_norm'] = bears_df['width'] / bears_df['file_width']
bears_df['height_norm'] = bears_df['height'] / bears_df['file_height']
bears_df['class'] = 0

clear_output()

In [40]:
bears_df.head()

Unnamed: 0_level_0,x1,y1,x2,y2,confidence,file_height,file_width,x_center,y_center,width,height,x_center_norm,y_center_norm,width_norm,height_norm,class
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
image_102.jpeg,282,223,755,723,1.0,768,768,518.5,473.0,473,500,0.67513,0.615885,0.615885,0.651042,0
image_105.jpeg,189,328,402,728,1.0,768,768,295.5,528.0,213,400,0.384766,0.6875,0.277344,0.520833,0
image_107.jpeg,276,439,523,767,1.0,768,768,399.5,603.0,247,328,0.520182,0.785156,0.321615,0.427083,0
image_111.jpeg,129,381,346,653,1.0,768,768,237.5,517.0,217,272,0.309245,0.673177,0.282552,0.354167,0
image_113.png,873,88,996,248,1.0,256,1024,934.5,168.0,123,160,0.912598,0.65625,0.120117,0.625,0


In [41]:
bears_df = bears_df[['class','x_center_norm','y_center_norm','width_norm','height_norm']].astype(str)
bears_df['concat_columns'] = bears_df.apply(lambda x: " ".join(x), axis =1)
clear_output()

In [42]:
bears_df.head()

Unnamed: 0_level_0,class,x_center_norm,y_center_norm,width_norm,height_norm,concat_columns
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
image_102.jpeg,0,0.6751302083333334,0.6158854166666666,0.6158854166666666,0.6510416666666666,0 0.6751302083333334 0.6158854166666666 0.6158...
image_105.jpeg,0,0.384765625,0.6875,0.27734375,0.5208333333333334,0 0.384765625 0.6875 0.27734375 0.520833333333...
image_107.jpeg,0,0.5201822916666666,0.78515625,0.3216145833333333,0.4270833333333333,0 0.5201822916666666 0.78515625 0.321614583333...
image_111.jpeg,0,0.3092447916666667,0.6731770833333334,0.2825520833333333,0.3541666666666667,0 0.3092447916666667 0.6731770833333334 0.2825...
image_113.png,0,0.91259765625,0.65625,0.1201171875,0.625,0 0.91259765625 0.65625 0.1201171875 0.625


In [43]:
bears_df['file_name'] = bears_df.index
bears_df = bears_df.reset_index(drop=True)

In [44]:
to_export = bears_df[['file_name','concat_columns']]
to_export.head()

Unnamed: 0,file_name,concat_columns
0,image_102.jpeg,0 0.6751302083333334 0.6158854166666666 0.6158...
1,image_105.jpeg,0 0.384765625 0.6875 0.27734375 0.520833333333...
2,image_107.jpeg,0 0.5201822916666666 0.78515625 0.321614583333...
3,image_111.jpeg,0 0.3092447916666667 0.6731770833333334 0.2825...
4,image_113.png,0 0.91259765625 0.65625 0.1201171875 0.625


In [45]:
labels_train = to_export[to_export['file_name'].isin(train_bears)]
labels_val = to_export[to_export['file_name'].isin(val_bears)]

In [46]:
print(f'Train labels shape: {labels_train.shape}')
print(f'Val labels shape: {labels_val.shape}')

Train labels shape: (61, 2)
Val labels shape: (20, 2)


In [47]:
labels_train_dict = labels_train.to_dict()
labels_val_dict = labels_val.to_dict()

In [48]:
for k,v in labels_train_dict['concat_columns'].items():

    f_name = labels_train_dict['file_name'][k].split('.')[0]
    with open(f'{train_label_path}{f_name}.txt', 'w') as f:
        f.write(v)

In [49]:
for k,v in labels_val_dict['concat_columns'].items():

    f_name = labels_val_dict['file_name'][k].split('.')[0]
    with open(f'{val_label_path}{f_name}.txt', 'w') as f:
        f.write(v)

In [50]:
not_bear_names = not_bears_df['file_name'].to_list()

k = int(len(bear_names)*0.25)
not_bear_k = k*3

val_not_bears = random.sample(not_bear_names, k=not_bear_k)
train_not_bears = list(set(not_bear_names).difference(set(val_not_bears)))


print(len(train_not_bears))
print(len(val_not_bears))
for file in val_not_bears:
    src_file = os.path.join(BEAR_DATA_PATH, file)
    dst_file = os.path.join(val_img_path, file)

    shutil.copy(src_file, dst_file)

for file in train_not_bears:
    src_file = os.path.join(BEAR_DATA_PATH, file)
    dst_file = os.path.join(train_img_path, file)

    shutil.copy(src_file, dst_file)

199
60


In [51]:
test_df = pd.read_csv('../csv_files/test.csv')
test_df.head()

Unnamed: 0,file_name
0,image_101.jpeg
1,image_108.jpeg
2,image_109.png
3,image_10.jpeg
4,image_110.jpeg


In [52]:
test_df.shape

(149, 1)

In [53]:
os.makedirs('test_images',exist_ok=True)

In [54]:
BEAR_DATA_PATH

'bear_images/bear_images/'

In [55]:
test_df['path'] = BEAR_DATA_PATH + test_df['file_name']
test_df

Unnamed: 0,file_name,path
0,image_101.jpeg,bear_images/bear_images/image_101.jpeg
1,image_108.jpeg,bear_images/bear_images/image_108.jpeg
2,image_109.png,bear_images/bear_images/image_109.png
3,image_10.jpeg,bear_images/bear_images/image_10.jpeg
4,image_110.jpeg,bear_images/bear_images/image_110.jpeg
...,...,...
144,image_85.jpeg,bear_images/bear_images/image_85.jpeg
145,image_91.png,bear_images/bear_images/image_91.png
146,image_92.jpeg,bear_images/bear_images/image_92.jpeg
147,image_94.jpeg,bear_images/bear_images/image_94.jpeg


In [56]:
dst_dir = 'test_images'
src_dir = test_df['path'].to_list()
for i in src_dir:
    shutil.copy(i,dst_dir)

In [57]:
test_df.shape

(149, 2)