# Python Script to Generate Cropped Images for CNN Classification Training

#### Cell to Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


#### Importing Libraries

In [None]:
import os
import cv2
from decimal import Decimal
from PIL import Image 
import PIL 
import numpy as np
from natsort import natsorted 
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow import keras
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator

##### Reading File Data

In [None]:
path_list = ["drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/", "drive/MyDrive/oxwash/dataset1/final_v2/"]  # paths to image dataset

In [None]:
txt_list = []
image_list = []

for path in path_list:
  for i in os.listdir(path):
    s = path
    if ".txt" in i:
      s += i
      txt_list.append(s)
    elif (".jpeg" in i) or (".png" in i) or (".jpg" in i):
      s += i
      image_list.append(s) 
  
image_list = natsorted(image_list)
txt_list = natsorted(txt_list)

print(f"Appended {txt_list} to List")

['drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/1.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/2.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/3.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/4.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/5.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/6.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/7.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/8.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/9.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/10.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/11.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/12.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/obj/13.txt', 'drive/MyDrive/oxwash/dataset2/data/individual_care_symbols

In [None]:
txt_list_1 = []
image_list_1 = []

for i in os.listdir("drive/MyDrive/oxwash/dataset1/final_v2/"):
  s = "drive/MyDrive/oxwash/dataset1/final_v2/"
  if ".txt" in i:
    s += i
    txt_list_1.append(s)
  elif ".jpg" in i:
    s += i
    image_list_1.append(s) 
  
image_list_1 = natsorted(image_list_1)
txt_list_1 = natsorted(txt_list_1)
print(len(image_list_1),len(txt_list_1))
print(txt_list_1[131])
print(image_list_1[131])

199 199
drive/MyDrive/oxwash/dataset1/final_v2/133.txt
drive/MyDrive/oxwash/dataset1/final_v2/133.jpg


In [None]:
categories = ["unspecified_material","bleaching_any","bleaching_not",
              "bleaching_onlySome","drying_flat","drying_flat_drip","drying_flat_drip_shade","drying_flat_shade","drying_line",
              "drying_line_drip","drying_line_drip_shade","drying_line_shade","drying_tumble_lower_60","drying_tumble_normal_80","drying_tumble_not",
              "ironing_not","ironing_plate_110","ironing_plate_150","ironing_plate_200","professional_dry_mild_F","professional_dry_mild_P",
              "professional_dry_normal_F","professional_dry_normal_P","professional_dry_not","professional_wet_mild_W","professional_wet_normal_W",
              "professional_wet_not","professional_wet_veryMild_W","washing_hand_40","washing_mild_30","washing_mild_40","washing_mild_60",
              "washing_normal_30","washing_normal_40","washing_normal_60","washing_normal_95","washing_not","washing_veryMild_30","washing_veryMild_40"]

#### Creating Directory for Generated Crop Images

In [None]:
os.mkdir("drive/MyDrive/oxwash/dataset_v5")
for label in categories:
   os.mkdir("drive/MyDrive/oxwash/dataset_v5/" + str(label))

#### Using Bounding Boxes in the image

Function for Calculating $(x,y,w,h)$ for Image Cropping

In [None]:
def coordinates(lines,img):
  crop_list = []
  dh,dw,_ = img.shape
 
  for i in range(len(lines)):
    _, x, y, w, h = map(float, lines[i].split(' '))

    left = int((x - w / 2) * dw)      # x-cordinate
    width = int((x + w / 2) * dw)     # width
    top = int((y - h / 2) * dh)       # y-cordinate
    height = int((y + h / 2) * dh)    # height

    if left < 0:
      left = 0
    if width > dw - 1:
      width = dw - 1
    if top < 0:
      top = 0
    if height > dh - 1:
      height = dh - 1   

    ROI= img[top: height,  left: width ]

    crop_list.append(ROI)  

  return img , crop_list

In [None]:
label_path_lst = ["drive/MyDrive/oxwash/dataset2/data/individual_care_symbols/Annotated_Revised.csv", "drive/MyDrive/oxwash/dataset1/labeled_photos_revised_v2.csv"]
label_df = pd.DataFrame()
for label_path in label_path_lst:
    temp_df = pd.read_csv(label_path, header=None)
    label_df = pd.concat([label_df,temp_df])

# for debugging
label_df.describe()  

Unnamed: 0,0,1,2,3,4,5
0,washing_hand_40,bleaching_not,ironing_plate_110,professional_dry_mild_P,drying_flat,
1,washing_mild_30,bleaching_not,ironing_plate_110,drying_tumble_not,professional_dry_normal_P,
2,washing_mild_30,bleaching_not,drying_tumble_not,ironing_plate_110,professional_dry_not,
3,washing_mild_30,bleaching_not,ironing_plate_110,drying_flat,professional_dry_normal_P,
4,washing_hand_40,bleaching_not,ironing_plate_110,professional_dry_normal_P,drying_flat,


In [None]:
label_df.head() 

#### Image Augmentation Module

Following Augmentations are done on an image:
- Rescale
- random rotation
- randome shear
- color channel shift
- random brightness
- color inversion

In [None]:
def random_invert_img(x, p=0.5):
  if  tf.random.uniform([]) < p:
    x = (255-x)
  else:
    x
  return x

In [None]:
gen = ImageDataGenerator(rescale = 1./255,rotation_range=45,shear_range=0.25,channel_shift_range=10, 
                            horizontal_flip=False, brightness_range=[0.1,3.0],fill_mode='reflect',
                            preprocessing_function= (lambda x:random_invert_img(x, 0.5)))

#### Main Cell of the Cropping Script

In [None]:
count = 0  # used for uniquely nameing new saved images

In [None]:
for i in range(label_df.shape[0]):
  temp_row = label_df.iloc[i]
  temp_row = temp_row.tolist()
  with open(txt_list[i]) as f:
      lines = f.readlines()
    
  img , crop_list = coordinates(lines,np.array(Image.open(image_list[i])))

  for j in range(len(temp_row)):
    if str(temp_row[j]) == "nan":
      continue

    image = np.expand_dims(crop_list[j],0)*255
  
    path = "drive/MyDrive/oxwash/dataset_v5/"+str(temp_row[j]) + "/" 
    aug_iter = gen.flow(image, save_to_dir=path, save_prefix=str(count),save_format='png')
    aug_images = [next(aug_iter)[0].astype('float32') for i2 in range(12)]
    print(path+str(count))
    count += 1

drive/MyDrive/oxwash/dataset_v5/washing_normal_30/0
drive/MyDrive/oxwash/dataset_v5/bleaching_not/1
drive/MyDrive/oxwash/dataset_v5/ironing_plate_150/2
drive/MyDrive/oxwash/dataset_v5/professional_dry_normal_P/3
drive/MyDrive/oxwash/dataset_v5/drying_flat/4
drive/MyDrive/oxwash/dataset_v5/washing_normal_40/5
drive/MyDrive/oxwash/dataset_v5/bleaching_not/6
drive/MyDrive/oxwash/dataset_v5/drying_tumble_lower_60/7
drive/MyDrive/oxwash/dataset_v5/ironing_plate_150/8
drive/MyDrive/oxwash/dataset_v5/professional_dry_not/9
drive/MyDrive/oxwash/dataset_v5/washing_normal_30/10
drive/MyDrive/oxwash/dataset_v5/bleaching_not/11
drive/MyDrive/oxwash/dataset_v5/ironing_plate_110/12
drive/MyDrive/oxwash/dataset_v5/professional_dry_normal_P/13
drive/MyDrive/oxwash/dataset_v5/drying_flat/14
drive/MyDrive/oxwash/dataset_v5/washing_normal_30/15
drive/MyDrive/oxwash/dataset_v5/bleaching_not/16
drive/MyDrive/oxwash/dataset_v5/ironing_plate_150/17
drive/MyDrive/oxwash/dataset_v5/professional_dry_normal_P/18

##### Cell to delete '../dataset_v5' directory

In [None]:
!rm -r drive/MyDrive/oxwash/dataset_v5 

rm: cannot remove 'drive/MyDrive/oxwash/dataset_v5': No such file or directory
