In [1]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/MyDrive/AI/.kaggle/"

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
cd /content/drive/MyDrive/AI/.kaggle

/content/drive/MyDrive/AI/.kaggle


In [4]:
!kaggle datasets download -d ttungl/adience-benchmark-gender-and-age-classification

Downloading adience-benchmark-gender-and-age-classification.zip to /content/drive/MyDrive/AI/.kaggle
100% 1.38G/1.39G [00:15<00:00, 160MB/s]
100% 1.39G/1.39G [00:15<00:00, 94.1MB/s]


In [5]:
!unzip adience-benchmark-gender-and-age-classification.zip

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
  inflating: AdienceBenchmarkGenderAndAgeClassification/faces/7651777@N03/coarse_tilt_aligned_face.1590.8448553152_ee57286766_o.jpg  
  inflating: AdienceBenchmarkGenderAndAgeClassification/faces/7651777@N03/coarse_tilt_aligned_face.1590.8672510540_b644469799_o.jpg  
  inflating: AdienceBenchmarkGenderAndAgeClassification/faces/7651777@N03/coarse_tilt_aligned_face.1590.8672510848_af2939038b_o.jpg  
  inflating: AdienceBenchmarkGenderAndAgeClassification/faces/7651777@N03/coarse_tilt_aligned_face.1590.9606205853_8e636e8abb_o.jpg  
  inflating: AdienceBenchmarkGenderAndAgeClassification/faces/7651777@N03/coarse_tilt_aligned_face.1591.8053467917_518c5f2cd3_o.jpg  
  inflating: AdienceBenchmarkGenderAndAgeClassification/faces/7651777@N03/coarse_tilt_aligned_face.1591.8069511407_3390c2bf9c_o.jpg  
  inflating: AdienceBenchmarkGenderAndAgeClassification/faces/7651777@N03/coarse_tilt_aligned_face.1592.8516377288_0d7350e215_o.jpg  
  inflating:

In [6]:
import numpy as np 
import plotly.express as px 
import pandas as pd 
import cv2 
import os 
from glob import glob
from PIL import Image
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Activation, Dropout, Flatten, Dense, Dropout, LayerNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
import matplotlib.pyplot as plt
import pickle 

In [None]:
df_list = []
for file_name in glob("/content/drive/MyDrive/AI/.kaggle/AdienceBenchmarkGenderAndAgeClassification/*.txt"):
    df_temp = pd.read_csv(file_name, sep="\t")
    df_list.append(df_temp)
df = pd.concat(df_list, axis=0, ignore_index=True)
del df_list

In [None]:
df = df.dropna()

In [None]:
df['image_path'] = df[['user_id', 'face_id', 'original_image']].apply(
    lambda x: os.path.join('/content/drive/MyDrive/AI/.kaggle/AdienceBenchmarkGenderAndAgeClassification/faces',
                           f"{x[0]}", f"coarse_tilt_aligned_face.{x[1]}.{x[2]}"), axis=1)

In [None]:
age_mapper = {'35': '(38, 48)','34': '(38, 48)','36': '(38, 48)','55':'(60, 100)','57':'(60, 100)',
              '58':'(60, 100)','23':'(8, 23)','3':'(0, 2)','2':'(0, 2)'}
for elem in df[df['age'].str.startswith("(")]['age'].value_counts().index:
    age_mapper[elem] = elem
df['age'] = df['age'].map(age_mapper)

In [None]:
df = df[df['age'] != 'None']

In [None]:
new_df = df[df['gender'] != 'u'][['age', 'gender', 'x', 'y', 'dx', 'dy','image_path']]

In [None]:
del df

In [None]:
new_df['gender'] = new_df['gender'].apply(lambda x : 1 if x == 'm' else 0).astype(np.float32)

In [None]:
from sklearn.preprocessing import LabelEncoder 
le = LabelEncoder()
new_df['age'] = le.fit_transform(new_df['age'])

In [None]:
with open('age_encoder.pkl','wb') as pkl_file:
    pickle.dump(le, pkl_file)

In [None]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def preprocess_image(individual_path):
    img = tf.io.read_file(np.array(individual_path).ravel()[0]) 
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128,128])
    return img 

In [None]:
def yield_training_values(X_train,y_train):
    for image_path, value in zip(X_train, y_train):
        image = preprocess_image(image_path)
        yield image, value 

In [None]:
ds_train = tf.data.Dataset.from_generator(yield_training_values,
                                          args=[X_train, y_train],
                                          output_types=(tf.float32, tf.float32),
                                          output_shapes=([128, 128, 3], [1]))


ds_test = tf.data.Dataset.from_generator(yield_training_values,
                                          args=[X_test, y_test],
                                          output_types=(tf.float32, tf.float32),
                                          output_shapes=([128, 128, 3], [1]))

In [None]:
AUTOTUNE = tf.data.AUTOTUNE
ds_train = ds_train.cache().shuffle(buffer_size=1000).batch(32).prefetch(buffer_size=AUTOTUNE)
ds_test = ds_test.cache().shuffle(buffer_size=1000).batch(32).prefetch(buffer_size=AUTOTUNE)

In [None]:
inputs = tf.keras.Input(shape = (128,128,3))
x = Conv2D(128, 3, activation='relu')(inputs)
x = MaxPool2D((2,2))(x)
x = Conv2D(64, 3, activation='relu')(x)
x = MaxPool2D((2,2))(x)
x = Conv2D(32,(3), activation='relu')(x)
x = MaxPool2D((2,2))(x)
x = Conv2D(32,(3), activation='relu')(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(512, activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = Dropout(0.3)(x)
outputs = Dense(2, activation='sigmoid')(x)

model = tf.keras.Model(inputs, outputs)

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
model.compile(optimizer='adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')

In [None]:
history = model.fit(ds_train, validation_data=ds_test,epochs=50)

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.2,0.2),
    tf.keras.layers.RandomTranslation(0.2,0.2),
    tf.keras.layers.Rescaling(1./255)
])

In [None]:
for image, _ in  ds_train.take(1):
    my_image = image 
image = my_image[2]
image = tf.cast(tf.expand_dims(image, 0), tf.float32)

In [None]:
plt.figure(figsize=(10, 10))
for i in range(9):
  augmented_image = data_augmentation(image)
  ax = plt.subplot(3, 3, i + 1)
  plt.imshow(augmented_image[0])
  plt.axis("off")

In [None]:
inputs = tf.keras.Input(shape = (128,128,3))
x = data_augmentation(inputs)
x = Conv2D(128, 3, activation='relu')(x)
x = MaxPool2D((2,2))(x)
x = Conv2D(64, 3, activation='relu')(x)
x = MaxPool2D((2,2))(x)
x = Dropout(0.2)(x)
x = Conv2D(32,(3), activation='relu')(x)
x = MaxPool2D((2,2))(x)
x = Conv2D(32,(3), activation='relu')(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(512, activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = Dropout(0.3)(x)
outputs = Dense(2, activation='softmax')(x)

model = tf.keras.Model(inputs, outputs)



callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
model.compile(optimizer='adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')

In [None]:
history = model.fit(ds_train, validation_data=ds_test,epochs=20)

In [None]:
X = new_df[['image_path']].values
y = new_df[['age']].values

def yield_training_values(X_train,y_train):
    for image_path, value in zip(X_train, y_train):
        image = preprocess_image(image_path)
        yield image, value 
        
def preprocess_image(individual_path):
    img = tf.io.read_file(np.array(individual_path).ravel()[0]) 
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128,128])
    return img 
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.2,0.2),
    tf.keras.layers.RandomTranslation(0.2,0.2),
    tf.keras.layers.Rescaling(1./255)
])

In [None]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [None]:
ds_train = tf.data.Dataset.from_generator(yield_training_values,
                                          args=[X_train, y_train],
                                          output_types=(tf.float32, tf.float32),
                                          output_shapes=([128, 128, 3], [1]))


ds_test = tf.data.Dataset.from_generator(yield_training_values,
                                          args=[X_test, y_test],
                                          output_types=(tf.float32, tf.float32),
                                          output_shapes=([128, 128, 3], [1]))

In [None]:
AUTOTUNE = tf.data.AUTOTUNE
ds_train = ds_train.cache().shuffle(buffer_size=100).batch(32).prefetch(buffer_size=AUTOTUNE)
ds_test = ds_test.cache().shuffle(buffer_size=100).batch(32).prefetch(buffer_size=AUTOTUNE)

In [None]:
inputs = tf.keras.Input(shape = (128,128,3))
x = data_augmentation(inputs)
x = Conv2D(128, 3, activation='relu')(x)
x = MaxPool2D((2,2))(x)
x = Conv2D(64, 3, activation='relu')(x)
x = MaxPool2D((2,2))(x)
x = tf.keras.layers.BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Conv2D(64,(3), activation='relu')(x)
x = MaxPool2D((2,2))(x)
x = Conv2D(32,(3), activation='relu')(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(512, activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = Dropout(0.3)(x)
outputs = Dense(13, activation='softmax')(x)

model = tf.keras.Model(inputs, outputs)



callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
model.compile(optimizer='adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')

In [None]:
history = model.fit(ds_train, validation_data=ds_test,epochs=100)