In [1]:
import os
import pandas as pd
from zipfile import ZipFile
import numpy as np
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import cv2
from scipy import ndimage

In [2]:
combined_faces_path = "C:/Users/shahr/Downloads/Compressed/combined_faces/content/combined_faces"
combined_faces_image_names = os.listdir(combined_faces_path)
len(combined_faces_image_names)

33453

In [3]:
# Defining a function to return the class labels corresponding to the age-ranges shown above.

def class_labels(age):
    if 1 <= age <= 3:
        return 0
    elif 4 <= age <= 10:
        return 1
    elif 11 <= age <= 19:
        return 2
    elif 20 <= age <= 35:
        return 3
    elif 36 <= age <= 50:
        return 4
    elif 51 <= age <= 65:
        return 5
    else:
        return 6

In [4]:
# Creating a new dataframe to hold all filenames, corresponding ages and class labels.

master_df = pd.DataFrame()
master_df['filename'] = combined_faces_image_names
master_df['age'] = master_df['filename'].map(lambda img_name : np.uint8(img_name.split("_")[0]))
master_df['age_class'] = master_df['age'].map(class_labels)

master_df.head()

Unnamed: 0,filename,age,age_class
0,100_1.jpg,100,6
1,100_10.jpg,100,6
2,100_11.jpg,100,6
3,100_12.jpg,100,6
4,100_13.jpg,100,6


In [5]:
master_df["age_class"].value_counts()

3    12625
4     5205
5     4194
0     3747
6     2691
1     2538
2     2453
Name: age_class, dtype: int64

In [20]:
# Shuffling the rows of combined_df so as to mix together the rows coming from both subreddit datasets.

master_df = shuffle(master_df, random_state=42).reset_index(drop=True)
master_df.head()

Unnamed: 0,filename,age,age_class
0,96_19.jpg,96,6
1,30_804.jpg,30,3
2,65_80.jpg,65,5
3,62_68.jpg,62,5
4,61_118.jpg,61,5


In [21]:
X = master_df[['filename', 'age']]
y = master_df['age_class']

In [22]:
# Splitting the dataset into training and testing datasets with test_size=0.3 and stratify=y. 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

In [23]:
print(X_train.shape)
print(X_test.shape)

(23417, 2)
(10036, 2)


In [24]:
temp_X_train = X_train.copy()
temp_X_train['age_class'] = y_train

temp_X_test = X_test.copy()
temp_X_test['age_class'] = y_test

In [25]:
combined_faces_path = "./combined_faces/content/combined_faces/"

def append_path_to_filename(filename):
    return os.path.join(combined_faces_path, filename)

In [26]:
temp_X_train['filename'] = temp_X_train['filename'].map(append_path_to_filename)
temp_X_test['filename'] = temp_X_test['filename'].map(append_path_to_filename)

In [27]:
# Exporting the above created dataframes as CSV files.

temp_X_train.to_csv("images_filenames_labels_train.csv", index=False)
temp_X_test.to_csv("images_filenames_labels_test.csv", index=False)

In [28]:
td=pd.read_csv("images_filenames_labels_train.csv")
ts=pd.read_csv("images_filenames_labels_test.csv")
tdl=list(td["filename"])
tsl=list(ts["filename"])

### Rotation Augmentaion

In [30]:
train_df = pd.read_csv("images_filenames_labels_train.csv")
train_df.shape

(23417, 3)

In [31]:
train_aug_df = pd.DataFrame(columns=train_df.columns)
train_aug_df

Unnamed: 0,filename,age,age_class


In [32]:
os.mkdir("combined_faces_train_augmented_rottaion")
new_path = "./combined_faces_train_augmented_rottaion"

In [33]:
# # Running a for-loop through all the images in train_df and augmenting them.

# for i in range(train_df.shape[0]):

#     # Reading the image filepath, name, age and target values.
#     img_path = train_df.loc[i, 'filename']
#     img_name = img_path.split("/")[-1]
#     img_age = train_df.loc[i, 'age']
#     img_target = train_df.loc[i, 'age_class']

#     # Reading the image from the filepath and converting it to proper colour format in cv2.
#     img = cv2.imread(img_path)
#     img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

#     # Creating the augmented versions of the image as shown above.
#     img_rot_pos40 = ndimage.rotate(img, 40, reshape=False)
#     img_rot_pos20 = ndimage.rotate(img, 20, reshape=False)
#     img_rot_neg20 = ndimage.rotate(img, -20, reshape=False)
#     img_rot_neg40 = ndimage.rotate(img, -40, reshape=False)
#     img_fliplr = np.fliplr(img)
#     img_fliplr_rot_pos40 = ndimage.rotate(img_fliplr, 40, reshape=False)
#     img_fliplr_rot_pos20 = ndimage.rotate(img_fliplr, 20, reshape=False)
#     img_fliplr_rot_neg20 = ndimage.rotate(img_fliplr, -20, reshape=False)
#     img_fliplr_rot_neg40 = ndimage.rotate(img_fliplr, -40, reshape=False)

#     # Creating new image names for the augmented images.
#     img_name_wo_jpg = img_name.split(".")[0]
#     img_name2 = img_name_wo_jpg+"_rot_pos40.jpg"
#     img_name3 = img_name_wo_jpg+"_rot_pos20.jpg"
#     img_name4 = img_name_wo_jpg+"_rot_neg20.jpg"
#     img_name5 = img_name_wo_jpg+"_rot_neg40.jpg"
#     img_name6 = img_name_wo_jpg+"_fliplr.jpg"
#     img_name7 = img_name_wo_jpg+"_fliplr_rot_pos40.jpg"
#     img_name8 = img_name_wo_jpg+"_fliplr_rot_pos20.jpg"
#     img_name9 = img_name_wo_jpg+"_fliplr_rot_neg20.jpg"
#     img_name10 = img_name_wo_jpg+"_fliplr_rot_neg40.jpg"

#     # Saving the original image and all the augmented images into a new folder combined_faces_train_augmented.
#     img_dest1 = os.path.join(new_path, img_name)
#     cv2.imwrite(img_dest1, img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
#     img_dest2 = os.path.join(new_path, img_name2)
#     cv2.imwrite(img_dest2, img_rot_pos40, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
#     img_dest3 = os.path.join(new_path, img_name3)
#     cv2.imwrite(img_dest3, img_rot_pos20, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
#     img_dest4 = os.path.join(new_path, img_name4)
#     cv2.imwrite(img_dest4, img_rot_neg20, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
#     img_dest5 = os.path.join(new_path, img_name5)
#     cv2.imwrite(img_dest5, img_rot_neg40, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
#     img_dest6 = os.path.join(new_path, img_name6)
#     cv2.imwrite(img_dest6, img_fliplr, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
#     img_dest7 = os.path.join(new_path, img_name7)
#     cv2.imwrite(img_dest7, img_fliplr_rot_pos40, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
#     img_dest8 = os.path.join(new_path, img_name8)
#     cv2.imwrite(img_dest8, img_fliplr_rot_pos20, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
#     img_dest9 = os.path.join(new_path, img_name9)
#     cv2.imwrite(img_dest9, img_fliplr_rot_neg20, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
#     img_dest10 = os.path.join(new_path, img_name10)
#     cv2.imwrite(img_dest10, img_fliplr_rot_neg40, [int(cv2.IMWRITE_JPEG_QUALITY), 100])

#     # Creating a new temp_df for the augmented images (similar to train_df).
#     temp_df = pd.DataFrame(columns=train_df.columns)
#     temp_df.loc[1] = ["/content"+img_dest1, img_age, img_target]
#     temp_df.loc[2] = ["/content"+img_dest2, img_age, img_target]
#     temp_df.loc[3] = ["/content"+img_dest3, img_age, img_target]
#     temp_df.loc[4] = ["/content"+img_dest4, img_age, img_target]
#     temp_df.loc[5] = ["/content"+img_dest5, img_age, img_target]
#     temp_df.loc[6] = ["/content"+img_dest6, img_age, img_target]
#     temp_df.loc[7] = ["/content"+img_dest7, img_age, img_target]
#     temp_df.loc[8] = ["/content"+img_dest8, img_age, img_target]
#     temp_df.loc[9] = ["/content"+img_dest9, img_age, img_target]
#     temp_df.loc[10] = ["/content"+img_dest10, img_age, img_target]

#     # Concatenating temp_df with the new train_aug_df.
#     train_aug_df = pd.concat([train_aug_df, temp_df], axis=0, ignore_index=True)

#     # Keeping track of progress and printing relevant statements for the user.
#     if (i+1) % 500 == 0:
#         print(f"Images augmented: {i+1} of {train_df.shape[0]}")

# print("\nDone augmenting all training dataset images and saved them into combined_faces_train_augmented.")

In [34]:
train_aug_df.head()

Unnamed: 0,filename,age,age_class


In [36]:
print(train_df.shape)
print(train_aug_df.shape)

(23417, 3)
(0, 3)


In [37]:
# Shuffling the augmented training dataset.

train_aug_df = shuffle(train_aug_df).reset_index(drop=True)

In [38]:
train_aug_df.to_csv("images_filenames_labels_train_augmented.csv", index=False)

### DataGen Augmentation

In [40]:
trainsetdata=pd.read_csv("images_filenames_labels_train.csv")
trainsetdata.head()

Unnamed: 0,filename,age,age_class
0,./combined_faces/content/combined_faces/8_236.jpg,8,1
1,./combined_faces/content/combined_faces/25_451...,25,3
2,./combined_faces/content/combined_faces/16_229...,16,2
3,./combined_faces/content/combined_faces/24_120...,24,3
4,./combined_faces/content/combined_faces/59_14.jpg,59,5


In [41]:
trainsetdata["img_name"]=trainsetdata["filename"].map(lambda x: x.split("/")[4])
trainsetdata.head()

Unnamed: 0,filename,age,age_class,img_name
0,./combined_faces/content/combined_faces/8_236.jpg,8,1,8_236.jpg
1,./combined_faces/content/combined_faces/25_451...,25,3,25_451.jpg
2,./combined_faces/content/combined_faces/16_229...,16,2,16_229.jpg
3,./combined_faces/content/combined_faces/24_120...,24,3,24_120.jpg
4,./combined_faces/content/combined_faces/59_14.jpg,59,5,59_14.jpg


In [42]:
trainsetdata["filename"][0]

'./combined_faces/content/combined_faces/8_236.jpg'

In [43]:
train_img_names=list(trainsetdata["img_name"])
train_img_names[0:5]

['8_236.jpg', '25_451.jpg', '16_229.jpg', '24_120.jpg', '59_14.jpg']

In [33]:
#make directory is aug src folder

org_image_names=os.listdir("./combined_faces_refined/combined_faces/content/combined_faces/")

ages=[]

for i in org_image_names:  
    age=i.split("_")[0]
    if age not in ages:
        ages.append(age)
len(ages)

path="Source_images"

for i in ages:
    os.mkdir(os.path.join(path,i))


In [34]:
#copy the train set files to aug src folders

org_image_names_with_path=[]
src_dir="./combined_faces_refined/combined_faces/content/combined_faces/"
for i in train_img_names:
    org_image_names_with_path.append(src_dir+i)
org_image_names_with_path[0:5]

['./combined_faces_refined/combined_faces/content/combined_faces/25_873.jpg',
 './combined_faces_refined/combined_faces/content/combined_faces/58_98.jpg',
 './combined_faces_refined/combined_faces/content/combined_faces/80_160.jpg',
 './combined_faces_refined/combined_faces/content/combined_faces/4_382.jpg',
 './combined_faces_refined/combined_faces/content/combined_faces/45_365.jpg']

In [35]:
import shutil

for i in org_image_names_with_path:
    filename=i.split("/")[5]
    age=filename.split("_")[0]
    target="./Source_images/"+age+"/"+filename
    shutil.copyfile(i,target)



In [45]:
#create the same age directories inside augmented folder
src="C:/Users/shahr/Downloads/Compressed/Augmented"
folders=["1-3","4-10","11-19","20-35","36-50","51-65","65+"]

for i in folders:
    path=os.path.join(src,i)
    if os.path.exists(path):
        continue
    else:
        os.makedirs(path)

In [38]:
from keras.preprocessing.image import ImageDataGenerator
from scipy import io 

datagen=ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    brightness_range=[0.2,1],
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
 )


In [44]:
import numpy as np 
import os
from PIL import Image as img
import skimage

for j in dir_in_src:
    image_dir="./Source_images/"+j+"/"
    dest_dir="./Aug_images/"+j+"/"
    prefix="aug_"+j+"_"
    size=200
    dataset=[]

    my_images=os.listdir(image_dir)
    for i,image_name in enumerate(my_images):
        if(image_name.split(".")[1]=="jpg"):
            image=skimage.io.imread(image_dir+image_name)
            image=img.fromarray(image,"RGB")
            dataset.append(np.array(image))
    x=np.array(dataset)

    i=1
    for batch in datagen.flow(x,batch_size=256,
                            save_to_dir=dest_dir,
                            save_prefix=prefix, 
                            save_format="jpg"):
        i+=1
        if i>15:
            break

In [53]:
aug_dirs=os.listdir('./Aug_images/')
filenames=[]
for i in aug_dirs:
    files=os.listdir(os.path.join('./Aug_images/',i))
    for file in files:
        name='./Aug_images/'+i+"/"+file
        # print(name)
        filenames.append(name)
    


In [56]:
for i in filenames:
    name=i.split("/")[3]
    target="./combined_faces_train_augmented_rottaion/"+name
    shutil.copy(i,target)

In [57]:
def extract_age(x):
    parts=x.split("_")
    if parts[0]=="aug":
        return int(parts[1])
    else:
        return int(parts[0])


In [69]:
final_aug_img_names=os.listdir("combined_faces_train_augmented_rottaion")
final_aug_img_df = pd.DataFrame()
final_aug_img_df['filename'] = final_aug_img_names
final_aug_img_df['age'] = final_aug_img_df['filename'].map(extract_age)
final_aug_img_df['age_class'] = final_aug_img_df['age'].map(class_labels)

final_aug_img_df['filename']=final_aug_img_df['filename'].map(lambda x: "/content/combined_faces_train_augmented_rottaion/"+x)

final_aug_img_df.to_csv("images_filenames_labels_train_augmented_mega.csv",index=False)

In [70]:
test_set=pd.read_csv("images_filenames_labels_test.csv")
test_set["filename"]=test_set["filename"].map(lambda x: "/content/"+x)

test_set.to_csv("images_filenames_labels_test_final.csv",index=False)