<a href="https://colab.research.google.com/github/MaciejAutuch/Projects_From_Jet_Brain/blob/master/AI_project_part_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AI & ML project - Face mask detection

# Part 1: Pipeline building and pre-processing 

## 1. Preparation

Install packages, mount Google Drive, unzip train and test data, extract labels

In [None]:
pip install mafaextractor

Collecting mafaextractor
  Downloading MAFAExtractor-0.1.1-py3-none-any.whl (5.4 kB)
Collecting scipy<2.0.0,>=1.5.2
  Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
[K     |████████████████████████████████| 38.1 MB 1.2 MB/s 
Installing collected packages: scipy, mafaextractor
  Attempting uninstall: scipy
    Found existing installation: scipy 1.4.1
    Uninstalling scipy-1.4.1:
      Successfully uninstalled scipy-1.4.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.[0m
Successfully installed mafaextractor-0.1.1 scipy-1.7.3


In [None]:
# install packages

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import shutil 

from mafaextractor import extract_mafa

In [None]:
# mount google drive

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# unzip train data and store in colab directory

!unzip /content/gdrive/MyDrive/AI_project/data/train-images.zip -d '/content/train_images'

In [None]:
# unzip test data and store in colab directory

!unzip /content/gdrive/MyDrive/AI_project/data/test-images.zip -d '/content/test_images'

In [None]:
# extract labels

train_label = extract_mafa('/content/gdrive/MyDrive/AI_project/data/LabelTrainAll.mat')
test_label = extract_mafa('/content/gdrive/MyDrive/AI_project/data/LabelTestAll.mat')

## 2. Pre-processing of labels

Check labels, transform relevant values to integers, drop entries that don't fulfil pic size conditions, reset index

In [None]:
# check train labels

pd.set_option('display.max_columns', None)
train_label.head(5)

Unnamed: 0,img_name,x_face_min,y_face_min,face_width,face_height,left_eye_x,left_eye_y,right_eye_x,right_eye_y,occ_width,occ_height,occ_type,occ_degree,gender,race,orientation,glasses_width,glasses_height,x_face_max,y_face_max,x_occ_min,y_occ_min,x_occ_max,y_occ_max,x_glasses_min,x_glasses_max,y_glasses_min,y_glasses_max
0,train_00000001.jpg,95,160,91,91,113,177,158,172,82,89,1,3,1,1,3,-1,-1,186,251,102,186,177,249,-1,-1,-1,-1
1,train_00000002.jpg,107,82,66,66,129,95,156,96,65,56,2,3,1,1,3,-1,-1,173,148,112,99,172,138,-1,-1,-1,-1
2,train_00000003.jpg,56,170,185,185,140,198,196,208,147,182,1,3,1,1,4,-1,-1,241,355,97,226,203,352,-1,-1,-1,-1
3,train_00000004.jpg,162,208,71,71,182,229,203,238,46,70,2,3,1,1,2,-1,-1,233,279,169,234,208,278,-1,-1,-1,-1
4,train_00000005.jpg,86,69,99,99,134,93,165,98,76,97,1,1,2,1,4,-1,-1,185,168,92,139,162,166,-1,-1,-1,-1


In [None]:
# check what occlusion types exist

train_label['occ_type'].value_counts()

 1    14109
 2    13139
 3     2202
-1        2
Name: occ_type, dtype: int64

In [None]:
# check what occlusion degrees exist

train_label['occ_degree'].value_counts()

 3    25608
 2     2902
 1      940
-1        2
Name: occ_degree, dtype: int64

In [None]:
# check number of train images

train_label['img_name'].count()

29452

In [None]:
# check min coordinate values of faces in train images to avoid errors during resizing

print(train_label['x_face_min'].min())
print(train_label['y_face_min'].min())
print(train_label['face_width'].min())
print(train_label['face_height'].min())

1
1
18
18


In [None]:
# check test labels

pd.set_option('display.max_columns', None)
test_label.head(5)

Unnamed: 0,img_name,x_face_min,y_face_min,face_width,face_height,face_type,occ_width,occ_height,occ_type,occ_degree,gender,race,orientation,glasses_width,glasses_height,x_face_max,y_face_max,x_occ_min,y_occ_min,x_occ_max,y_occ_max,x_glasses_min,x_glasses_max,y_glasses_min,y_glasses_max
0,test_00000001.jpg,2694.0,1211.0,353.0,353.0,1.0,144.0,337.0,2.0,3.0,2.0,2.0,1.0,-1.0,-1.0,3047.0,1564.0,2703.0,1316.0,2838.0,1548.0,-1.0,-1.0,-1.0,-1.0
1,test_00000001.jpg,1754.0,1449.0,68.0,68.0,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1822.0,1517.0,1753.0,1448.0,1753.0,1448.0,-1.0,-1.0,-1.0,-1.0
2,test_00000002.jpg,113.0,95.0,226.0,226.0,1.0,181.0,221.0,1.0,3.0,1.0,2.0,3.0,-1.0,-1.0,339.0,321.0,122.0,166.0,294.0,316.0,-1.0,-1.0,-1.0,-1.0
3,test_00000003.jpg,352.0,114.0,151.0,151.0,1.0,137.0,135.0,1.0,3.0,2.0,2.0,3.0,-1.0,-1.0,503.0,265.0,369.0,159.0,489.0,249.0,-1.0,-1.0,-1.0,-1.0
4,test_00000003.jpg,799.0,217.0,139.0,139.0,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,938.0,356.0,798.0,216.0,798.0,216.0,-1.0,-1.0,-1.0,-1.0


In [None]:
# convert coordinate values of faces to integers (as train labels)

test_label['x_face_min'] = test_label['x_face_min'].astype('int')
test_label['y_face_min'] = test_label['y_face_min'].astype('int')
test_label['face_width'] = test_label['face_width'].astype('int')
test_label['face_height'] = test_label['face_height'].astype('int')
test_label.head(5)

Unnamed: 0,img_name,x_face_min,y_face_min,face_width,face_height,face_type,occ_width,occ_height,occ_type,occ_degree,gender,race,orientation,glasses_width,glasses_height,x_face_max,y_face_max,x_occ_min,y_occ_min,x_occ_max,y_occ_max,x_glasses_min,x_glasses_max,y_glasses_min,y_glasses_max
0,test_00000001.jpg,2694,1211,353,353,1.0,144.0,337.0,2.0,3.0,2.0,2.0,1.0,-1.0,-1.0,3047.0,1564.0,2703.0,1316.0,2838.0,1548.0,-1.0,-1.0,-1.0,-1.0
1,test_00000001.jpg,1754,1449,68,68,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1822.0,1517.0,1753.0,1448.0,1753.0,1448.0,-1.0,-1.0,-1.0,-1.0
2,test_00000002.jpg,113,95,226,226,1.0,181.0,221.0,1.0,3.0,1.0,2.0,3.0,-1.0,-1.0,339.0,321.0,122.0,166.0,294.0,316.0,-1.0,-1.0,-1.0,-1.0
3,test_00000003.jpg,352,114,151,151,1.0,137.0,135.0,1.0,3.0,2.0,2.0,3.0,-1.0,-1.0,503.0,265.0,369.0,159.0,489.0,249.0,-1.0,-1.0,-1.0,-1.0
4,test_00000003.jpg,799,217,139,139,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,938.0,356.0,798.0,216.0,798.0,216.0,-1.0,-1.0,-1.0,-1.0


In [None]:
# check number of test images

test_label['img_name'].count()

10033

In [None]:
# check min coordinate values of faces in test images to avoid errors during resizing

print(test_label['x_face_min'].min())
print(test_label['y_face_min'].min())
print(test_label['face_width'].min())
print(test_label['face_height'].min())

0
-4
1
1


In [None]:
# drop train images with face width or height lower than 32 or negative x or y values

train_label.drop(train_label[(train_label['face_width'] < 32) |
                             (train_label['face_height'] < 32) |
                             (train_label['x_face_min'] < 0) |
                             (train_label['y_face_min'] < 0)
                             ].index, inplace=True)
train_label['img_name'].count()

29432

In [None]:
# check min coordinate values of faces in train images again

print(train_label['x_face_min'].min())
print(train_label['y_face_min'].min())
print(train_label['face_width'].min())
print(train_label['face_height'].min())

1
1
32
32


In [None]:
# drop test images with face width or height lower than 32 or negative x or y values

test_label.drop(test_label[(test_label['face_width'] < 32) |
                           (test_label['face_height'] < 32) |
                           (test_label['x_face_min'] < 0) |
                           (test_label['y_face_min'] < 0)
                           ].index, inplace=True)
test_label['img_name'].count()

9063

In [None]:
# check min coordinate values of faces in test images again

print(test_label['x_face_min'].min())
print(test_label['y_face_min'].min())
print(test_label['face_width'].min())
print(test_label['face_height'].min())

0
0
32
32


In [None]:
# reset index of train labels data and drop the 'index' column that was created by doing so

train_label = train_label.reset_index()
train_label.drop('index', axis=1, inplace=True)
train_label

Unnamed: 0,img_name,x_face_min,y_face_min,face_width,face_height,left_eye_x,left_eye_y,right_eye_x,right_eye_y,occ_width,occ_height,occ_type,occ_degree,gender,race,orientation,glasses_width,glasses_height,x_face_max,y_face_max,x_occ_min,y_occ_min,x_occ_max,y_occ_max,x_glasses_min,x_glasses_max,y_glasses_min,y_glasses_max
0,train_00000001.jpg,95,160,91,91,113,177,158,172,82,89,1,3,1,1,3,-1,-1,186,251,102,186,177,249,-1,-1,-1,-1
1,train_00000002.jpg,107,82,66,66,129,95,156,96,65,56,2,3,1,1,3,-1,-1,173,148,112,99,172,138,-1,-1,-1,-1
2,train_00000003.jpg,56,170,185,185,140,198,196,208,147,182,1,3,1,1,4,-1,-1,241,355,97,226,203,352,-1,-1,-1,-1
3,train_00000004.jpg,162,208,71,71,182,229,203,238,46,70,2,3,1,1,2,-1,-1,233,279,169,234,208,278,-1,-1,-1,-1
4,train_00000005.jpg,86,69,99,99,134,93,165,98,76,97,1,1,2,1,4,-1,-1,185,168,92,139,162,166,-1,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29427,train_00025872.jpg,44,4,107,107,67,21,119,20,101,100,2,3,2,2,3,-1,-1,151,111,53,35,145,104,-1,-1,-1,-1
29428,train_00025873.jpg,31,3,80,80,58,18,90,29,69,79,2,3,2,2,3,-1,-1,111,83,35,38,100,82,-1,-1,-1,-1
29429,train_00025874.jpg,14,28,83,83,33,51,72,56,80,87,2,2,2,2,3,-1,-1,97,111,18,81,94,115,-1,-1,-1,-1
29430,train_00025875.jpg,38,4,72,72,60,23,93,21,65,77,2,3,2,2,3,-1,-1,110,76,40,33,103,81,-1,-1,-1,-1


In [None]:
# create a new 'ID' column in train labels data that consists of exactly 5 digits

train_label['index'] = train_label.index
train_label['ID'] = train_label['index'].apply('{:0>5}'.format)
train_label.drop('index', axis=1, inplace=True)
first_column = train_label.pop('ID')
train_label.insert(0, 'ID', first_column)
train_label.head()

Unnamed: 0,ID,img_name,x_face_min,y_face_min,face_width,face_height,left_eye_x,left_eye_y,right_eye_x,right_eye_y,occ_width,occ_height,occ_type,occ_degree,gender,race,orientation,glasses_width,glasses_height,x_face_max,y_face_max,x_occ_min,y_occ_min,x_occ_max,y_occ_max,x_glasses_min,x_glasses_max,y_glasses_min,y_glasses_max
0,0,train_00000001.jpg,95,160,91,91,113,177,158,172,82,89,1,3,1,1,3,-1,-1,186,251,102,186,177,249,-1,-1,-1,-1
1,1,train_00000002.jpg,107,82,66,66,129,95,156,96,65,56,2,3,1,1,3,-1,-1,173,148,112,99,172,138,-1,-1,-1,-1
2,2,train_00000003.jpg,56,170,185,185,140,198,196,208,147,182,1,3,1,1,4,-1,-1,241,355,97,226,203,352,-1,-1,-1,-1
3,3,train_00000004.jpg,162,208,71,71,182,229,203,238,46,70,2,3,1,1,2,-1,-1,233,279,169,234,208,278,-1,-1,-1,-1
4,4,train_00000005.jpg,86,69,99,99,134,93,165,98,76,97,1,1,2,1,4,-1,-1,185,168,92,139,162,166,-1,-1,-1,-1


In [None]:
# reset index of test labels data and drop the 'index' column that was created by doing so

test_label = test_label.reset_index()
test_label.drop('index', axis=1, inplace=True)
test_label

Unnamed: 0,img_name,x_face_min,y_face_min,face_width,face_height,face_type,occ_width,occ_height,occ_type,occ_degree,gender,race,orientation,glasses_width,glasses_height,x_face_max,y_face_max,x_occ_min,y_occ_min,x_occ_max,y_occ_max,x_glasses_min,x_glasses_max,y_glasses_min,y_glasses_max
0,test_00000001.jpg,2694,1211,353,353,1.0,144.0,337.0,2.0,3.0,2.0,2.0,1.0,-1.0,-1.0,3047.0,1564.0,2703.0,1316.0,2838.0,1548.0,-1.0,-1.0,-1.0,-1.0
1,test_00000001.jpg,1754,1449,68,68,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1822.0,1517.0,1753.0,1448.0,1753.0,1448.0,-1.0,-1.0,-1.0,-1.0
2,test_00000002.jpg,113,95,226,226,1.0,181.0,221.0,1.0,3.0,1.0,2.0,3.0,-1.0,-1.0,339.0,321.0,122.0,166.0,294.0,316.0,-1.0,-1.0,-1.0,-1.0
3,test_00000003.jpg,352,114,151,151,1.0,137.0,135.0,1.0,3.0,2.0,2.0,3.0,-1.0,-1.0,503.0,265.0,369.0,159.0,489.0,249.0,-1.0,-1.0,-1.0,-1.0
4,test_00000003.jpg,799,217,139,139,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,938.0,356.0,798.0,216.0,798.0,216.0,-1.0,-1.0,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9058,test_00004931.jpg,135,51,285,285,1.0,280.0,267.0,1.0,3.0,2.0,2.0,3.0,-1.0,-1.0,420.0,336.0,189.0,159.0,415.0,318.0,-1.0,-1.0,-1.0,-1.0
9059,test_00004932.jpg,38,120,285,285,1.0,255.0,258.0,1.0,3.0,2.0,2.0,3.0,-1.0,-1.0,323.0,405.0,68.0,213.0,293.0,378.0,-1.0,-1.0,-1.0,-1.0
9060,test_00004933.jpg,80,121,245,245,1.0,238.0,242.0,1.0,3.0,2.0,2.0,3.0,-1.0,-1.0,325.0,366.0,130.0,202.0,318.0,363.0,-1.0,-1.0,-1.0,-1.0
9061,test_00004934.jpg,148,266,276,276,1.0,265.0,248.0,1.0,3.0,2.0,2.0,3.0,-1.0,-1.0,424.0,542.0,186.0,334.0,413.0,514.0,-1.0,-1.0,-1.0,-1.0


In [None]:
# create a new 'ID' column in test labels data that consists of exactly 5 digits

test_label['index'] = test_label.index
test_label['ID'] = test_label['index'].apply('{:0>5}'.format)
test_label.drop('index', axis=1, inplace=True)
first_column = test_label.pop('ID')
test_label.insert(0, 'ID', first_column)
test_label.head()

Unnamed: 0,ID,img_name,x_face_min,y_face_min,face_width,face_height,face_type,occ_width,occ_height,occ_type,occ_degree,gender,race,orientation,glasses_width,glasses_height,x_face_max,y_face_max,x_occ_min,y_occ_min,x_occ_max,y_occ_max,x_glasses_min,x_glasses_max,y_glasses_min,y_glasses_max
0,0,test_00000001.jpg,2694,1211,353,353,1.0,144.0,337.0,2.0,3.0,2.0,2.0,1.0,-1.0,-1.0,3047.0,1564.0,2703.0,1316.0,2838.0,1548.0,-1.0,-1.0,-1.0,-1.0
1,1,test_00000001.jpg,1754,1449,68,68,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1822.0,1517.0,1753.0,1448.0,1753.0,1448.0,-1.0,-1.0,-1.0,-1.0
2,2,test_00000002.jpg,113,95,226,226,1.0,181.0,221.0,1.0,3.0,1.0,2.0,3.0,-1.0,-1.0,339.0,321.0,122.0,166.0,294.0,316.0,-1.0,-1.0,-1.0,-1.0
3,3,test_00000003.jpg,352,114,151,151,1.0,137.0,135.0,1.0,3.0,2.0,2.0,3.0,-1.0,-1.0,503.0,265.0,369.0,159.0,489.0,249.0,-1.0,-1.0,-1.0,-1.0
4,4,test_00000003.jpg,799,217,139,139,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,938.0,356.0,798.0,216.0,798.0,216.0,-1.0,-1.0,-1.0,-1.0


## 3. Resizing pictures

Create new folder for resized pictures, define function to resize pictures, apply function to pictures, store as zip file

In [None]:
# createnew folder in colab directory for processed images

os.mkdir('/content/train_images_processed/')
os.mkdir('/content/test_images_processed/')

In [None]:
# define a function to trim train images

def trim_train_pics(df, op_path, sv_path, row, nr):
  path = op_path + row['img_name']
  img = cv2.imread(path)
  x = row['x_face_min']
  y = row['y_face_min']
  w = row['face_width']
  h = row['face_height']
  img = img[y:y+h, x:x+w]
  img = cv2.resize(img, (32, 32), interpolation = cv2.INTER_AREA)
  return cv2.imwrite(str(sv_path + 'train_' + str(df.iloc[nr]['ID']) + '.jpg'), img)

In [None]:
# process train images

for index, row in train_label.iterrows():
  try:
    trim_train_pics(train_label, '/content/train_images/images/', '/content/train_images_processed/', row, index)
  except:
    continue

In [None]:
# define a function to trim test images

def trim_test_pics(df, op_path, sv_path, row, nr):
  path = op_path + row['img_name']
  img = cv2.imread(path)
  x = row['x_face_min']
  y = row['y_face_min']
  w = row['face_width']
  h = row['face_height']
  img = img[y:y+h, x:x+w]
  img = cv2.resize(img, (32, 32), interpolation = cv2.INTER_AREA)
  return cv2.imwrite(str(sv_path + 'test_' + str(df.iloc[nr]['ID']) + '.jpg'), img)

In [None]:
# process test images

for index, row in test_label.iterrows():
  try:
    trim_test_pics(test_label, '/content/test_images/images/', '/content/test_images_processed/', row, index)
  except:
    continue

Create zip files containing the processed train and test images:

In [None]:
!zip -r /content/train_images_processed.zip /content/train_images_processed

In [None]:
!zip -r /content/test_images_processed.zip /content/test_images_processed

Create a csv file of the test labels data to check which test images were "skipped" during the processing above:

In [None]:
test_label.to_csv('test_label_check.csv')

## 4. Create face mask classes in labels data

Derive "face_mask" class (0 = "no_mask", 1 = "yes_mask") based on pre-defined conditions, remove entries in test labels data that were not processed above, save label data as csv file

In [None]:
# create face mask classes in train labels data based on occlusion type and degree

condition = [(train_label['occ_type'] == 1) |
             ((train_label['occ_type'] == 2) & (train_label['occ_degree'] == 3))]

value = [1]
train_label['face_mask'] = np.select(condition, value)

train_label['face_mask'] = train_label['face_mask'].replace(0, 'no_mask')
train_label['face_mask'] = train_label['face_mask'].replace(1, 'yes_mask')

train_label.head(5)

Unnamed: 0,ID,img_name,x_face_min,y_face_min,face_width,face_height,left_eye_x,left_eye_y,right_eye_x,right_eye_y,occ_width,occ_height,occ_type,occ_degree,gender,race,orientation,glasses_width,glasses_height,x_face_max,y_face_max,x_occ_min,y_occ_min,x_occ_max,y_occ_max,x_glasses_min,x_glasses_max,y_glasses_min,y_glasses_max,face_mask
0,0,train_00000001.jpg,95,160,91,91,113,177,158,172,82,89,1,3,1,1,3,-1,-1,186,251,102,186,177,249,-1,-1,-1,-1,yes_mask
1,1,train_00000002.jpg,107,82,66,66,129,95,156,96,65,56,2,3,1,1,3,-1,-1,173,148,112,99,172,138,-1,-1,-1,-1,yes_mask
2,2,train_00000003.jpg,56,170,185,185,140,198,196,208,147,182,1,3,1,1,4,-1,-1,241,355,97,226,203,352,-1,-1,-1,-1,yes_mask
3,3,train_00000004.jpg,162,208,71,71,182,229,203,238,46,70,2,3,1,1,2,-1,-1,233,279,169,234,208,278,-1,-1,-1,-1,yes_mask
4,4,train_00000005.jpg,86,69,99,99,134,93,165,98,76,97,1,1,2,1,4,-1,-1,185,168,92,139,162,166,-1,-1,-1,-1,yes_mask


In [None]:
# create face mask classes in test labels data based on occlusion type and degree

condition = [(test_label['occ_type'] == 1) |
             ((test_label['occ_type'] == 2) & (test_label['occ_degree'] == 3))]

value = [1]
test_label['face_mask'] = np.select(condition, value)

test_label['face_mask'] = test_label['face_mask'].replace(0, 'no_mask')
test_label['face_mask'] = test_label['face_mask'].replace(1, 'yes_mask')

test_label.head(5)

Unnamed: 0,ID,img_name,x_face_min,y_face_min,face_width,face_height,face_type,occ_width,occ_height,occ_type,occ_degree,gender,race,orientation,glasses_width,glasses_height,x_face_max,y_face_max,x_occ_min,y_occ_min,x_occ_max,y_occ_max,x_glasses_min,x_glasses_max,y_glasses_min,y_glasses_max,face_mask
0,0,test_00000001.jpg,2694,1211,353,353,1.0,144.0,337.0,2.0,3.0,2.0,2.0,1.0,-1.0,-1.0,3047.0,1564.0,2703.0,1316.0,2838.0,1548.0,-1.0,-1.0,-1.0,-1.0,yes_mask
1,1,test_00000001.jpg,1754,1449,68,68,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1822.0,1517.0,1753.0,1448.0,1753.0,1448.0,-1.0,-1.0,-1.0,-1.0,no_mask
2,2,test_00000002.jpg,113,95,226,226,1.0,181.0,221.0,1.0,3.0,1.0,2.0,3.0,-1.0,-1.0,339.0,321.0,122.0,166.0,294.0,316.0,-1.0,-1.0,-1.0,-1.0,yes_mask
3,3,test_00000003.jpg,352,114,151,151,1.0,137.0,135.0,1.0,3.0,2.0,2.0,3.0,-1.0,-1.0,503.0,265.0,369.0,159.0,489.0,249.0,-1.0,-1.0,-1.0,-1.0,yes_mask
4,4,test_00000003.jpg,799,217,139,139,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,938.0,356.0,798.0,216.0,798.0,216.0,-1.0,-1.0,-1.0,-1.0,no_mask


In [None]:
# drop entries in test labels data that were not processed above
# (compared the labels data in the csv file with the actual processed images before)

drop_list =[1627, 1772, 2403, 2404, 2491, 3760, 4048, 4880, 4898, 5295, 5851, 5852,
            5854, 6570, 6571, 6572, 6573, 6574, 6575, 6576, 6895, 8502, 8850]

for num in drop_list:
  test_label = test_label.drop(num)

test_label.shape

(9040, 27)

In [None]:
# save labels data as csv file

train_label.to_csv('train_label.csv')
test_label.to_csv('test_label.csv')

## 5. Create class folders

Create new labels data create two folders yes/no mask for both train and test dataset, move images accordingly, drop test images that were not processed (again), save new labels data as csv file

In [None]:
# create new train labels data

train_label['img_name'] = str('train_') + train_label['ID'].astype(str) + str('.jpg')
train_labels = train_label[['img_name', 'face_mask']]
train_labels

Unnamed: 0,img_name,face_mask
0,train_00000.jpg,yes_mask
1,train_00001.jpg,yes_mask
2,train_00002.jpg,yes_mask
3,train_00003.jpg,yes_mask
4,train_00004.jpg,yes_mask
...,...,...
29427,train_29427.jpg,yes_mask
29428,train_29428.jpg,yes_mask
29429,train_29429.jpg,no_mask
29430,train_29430.jpg,yes_mask


In [None]:
# create new test labels data

test_label['img_name'] = str('test_') + test_label['ID'].astype(str) + str('.jpg')
test_labels = test_label[['img_name', 'face_mask']]
test_labels

Unnamed: 0,img_name,face_mask
0,test_00000.jpg,yes_mask
1,test_00001.jpg,no_mask
2,test_00002.jpg,yes_mask
3,test_00003.jpg,yes_mask
4,test_00004.jpg,no_mask
...,...,...
9058,test_09058.jpg,yes_mask
9059,test_09059.jpg,yes_mask
9060,test_09060.jpg,yes_mask
9061,test_09061.jpg,yes_mask


In [None]:
# create two new folders "no_mask" and "yes_mask" for train images

class_names = ['no_mask', 'yes_mask']

for i in class_names:
  os.makedirs(os.path.join('train_', i))

In [None]:
# move train images into new folders based on face mask classes

for c in class_names:
  for i in list(train_labels[train_labels['face_mask']==c]['img_name']):
    try:
      get_image = os.path.join('/content/train_images_processed/', i)
      move_image_to_cat = shutil.move(get_image, '/content/train_/'+c)
    except:
      continue

In [None]:
# create two new folders "no_mask" and "yes_mask" for test images

for i in class_names:
  os.makedirs(os.path.join('test_', i))

In [None]:
# move test images into new folders based on face mask classes

for c in class_names:
  for i in list(test_labels[test_labels['face_mask']==c]['img_name']):
    try:
      get_image = os.path.join('/content/test_images_processed/', i)
      move_image_to_cat = shutil.move(get_image, '/content/test_/'+c)
    except:
      continue

Create zip files containing the processed train and test images in new folders:

In [None]:
!zip -r /content/train_.zip /content/train_

In [None]:
!zip -r /content/test_.zip /content/test_

Create a csv file of the test labels data to check which test images were "skipped" during the processing above (again):

In [None]:
test_labels.to_csv('test_labels_check.csv')

In [None]:
# drop entries in new test labels data that were not processed above

drop_list_new =[1496, 1633, 2211, 2212, 2286, 3472, 3701, 4437, 4453, 4779, 5254, 5255,
                5256, 5907, 5908, 5909, 5910, 5911, 5912, 5913, 6213, 7611, 7929]

for num in drop_list_new:
  test_labels = test_labels.drop(num)

test_labels.shape

(9017, 2)

In [None]:
# save new train and test labels data as csv file

train_labels.to_csv('train_labels.csv')
test_labels.to_csv('test_labels.csv')

In [None]:
# export notebook to html

!jupyter nbconvert --to html '/content/AI_project_part_1.ipynb'

[NbConvertApp] Converting notebook /content/AI_project_part_1.ipynb to html
[NbConvertApp] Writing 422205 bytes to /content/AI_project_part_1.html
