In [None]:
!pip install tensorflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path
from PIL import Image
import tensorflow as tf
import zipfile
from tensorflow.keras.preprocessing.image import load_img
from sklearn.model_selection import train_test_split
from tensorflow.keras.initializers import random_uniform, glorot_uniform, constant, identity
from tensorflow.keras.layers import Dropout, Input, Add, Dense, Activation, BatchNormalization, Flatten, Conv2D, MaxPooling2D, GlobalMaxPooling2D
from tensorflow.keras.models import Model, load_model

In [2]:
with zipfile.ZipFile('/content/crop_part1.zip' , 'r') as zip_ref:
    zip_ref.extractall()

In [3]:
path = Path("/content/crop_part1")
filenames = list(map(lambda x: x.name, path.glob('*.jpg')))

In [None]:
print(len(filenames))
print(filenames[:3])

9780
['17_0_0_20170110231640217.jpg.chip.jpg', '26_1_2_20170104022148861.jpg.chip.jpg', '5_1_0_20170109193708669.jpg.chip.jpg']


In [None]:
import glob

creating our dictionary to help us on parsing the information from the dataset, along with some other information

In [None]:
dataset_dict = {
    'race_id': {
        0: 'white', 
        1: 'black', 
        2: 'asian', 
        3: 'indian', 
        4: 'others'
    },
    'gender_id': {
        0: 'male',
        1: 'female'
    }
}

dataset_dict['gender_alias'] = dict((g, i) for i, g in dataset_dict['gender_id'].items())
dataset_dict['race_alias'] = dict((r, i) for i, r in dataset_dict['race_id'].items())

Define a function to help us on extracting the data from our dataset. This function will be used to iterate over each file of the UTK dataset and return a Pandas Dataframe containing all the fields (NOW OUR DATASET NAMED DF)

In [None]:
def parse_dataset(dataset_path, ext='jpg'):
    """
    Used to extract information about our dataset. It does iterate over all images and return a DataFrame with
    the data (age, gender and sex) of all files.
    """
    def parse_info_from_file(path):
        """
        Parse information from a single file
        """
        try:
            filename = os.path.split(path)[1]
            filename = os.path.splitext(filename)[0]
            age, gender, race, _ = filename.split('_')

            return int(age), dataset_dict['gender_id'][int(gender)], dataset_dict['race_id'][int(race)]
        except Exception as ex:
            return None, None, None
        
    files = glob.glob(os.path.join(dataset_path, "*.%s" % ext))
    
    records = []
    for file in files:
        info = parse_info_from_file(file)
        records.append(info)
        
    df = pd.DataFrame(records)
    df['file'] = files
    df.columns = ['age', 'gender', 'race', 'file']
    df = df.dropna()
    
    return df

In [None]:
df = parse_dataset('crop_part1')
df.head()

Unnamed: 0,age,gender,race,file
0,17.0,male,white,crop_part1/17_0_0_20170110231640217.jpg.chip.jpg
1,26.0,female,asian,crop_part1/26_1_2_20170104022148861.jpg.chip.jpg
2,5.0,female,white,crop_part1/5_1_0_20170109193708669.jpg.chip.jpg
3,36.0,female,white,crop_part1/36_1_0_20170103182523314.jpg.chip.jpg
4,21.0,female,others,crop_part1/21_1_4_20170103234037067.jpg.chip.jpg


Data visualization To understand the distribution of our dataset


In [None]:
import plotly.graph_objects as go

def plot_distribution(pd_series):
    labels = pd_series.value_counts().index.tolist()
    counts = pd_series.value_counts().values.tolist()
    
    pie_plot = go.Pie(labels=labels, values=counts, hole=.3)
    fig = go.Figure(data=[pie_plot])
    fig.update_layout(title_text='Distribution for %s' % pd_series.name)
    
    fig.show()

RACE Distribution

In [None]:
plot_distribution(df['race'])

gender distribution

In [None]:
plot_distribution(df['gender'])

Age distribution

In [None]:
import plotly.express as px
fig = px.histogram(df, x="age", nbins=20)
fig.update_layout(title_text='Age distribution')
fig.show()

Age distribution with pie chart

In [None]:
bins = [0, 10, 20, 30, 40, 60, 80, np.inf]
names = ['<10', '10-20', '20-30', '30-40', '40-60', '60-80', '80+']

age_binned = pd.cut(df['age'], bins, labels=names)
plot_distribution(age_binned)

In [4]:
np.random.seed(10)
np.random.shuffle(filenames)

age_labels, gender_labels, imgs, race_labels = [], [], [], []

for filename in filenames:
   
    img = load_img("/content/crop_part1/"+filename, grayscale=True)
    img = img.resize((128,128), Image.ANTIALIAS)
    img = np.array(img)
    #img=img.flatten()
    temp = filename.split('_')
    age_labels.append(temp[0])
    gender_labels.append(temp[1])
    race_labels.append(temp[2])
    imgs.append(img)



In [None]:
# for file in os.listdir("/content/crop_part1/"):
  
#     imgs.append(img)
#     print(img)
    

In [5]:
df = pd.DataFrame()

In [6]:
df = pd.DataFrame()
df['image'], df['age'], df['gender'] ,df['race']= imgs, age_labels, gender_labels ,race_labels
# print(len(imgs))
# print(len(age_labels))

In [7]:
df.head()

Unnamed: 0,image,age,gender,race
0,"[[244, 242, 240, 238, 212, 142, 62, 23, 11, 6,...",51,0,2
1,"[[10, 5, 9, 14, 10, 13, 22, 24, 20, 20, 22, 20...",29,1,1
2,"[[78, 73, 73, 75, 88, 98, 118, 135, 155, 174, ...",31,0,0
3,"[[3, 3, 3, 3, 4, 4, 6, 8, 10, 10, 11, 14, 26, ...",21,1,3
4,"[[24, 10, 12, 19, 27, 35, 43, 53, 63, 73, 80, ...",45,0,3


In [None]:
#gender_dict = {0:"Male",1:"Female"}

In [None]:
#race_dict={'white':0,'black':1,'asian':2,'indian':3,'others':4}

In [None]:
df = df.astype({'age':'float32', 'gender': 'int32'})

In [None]:
#print(df.dtypes)

In [None]:
# img = Image.open("/content/crop_part1/"+df.image[1])
# plt.imshow(img)

In [None]:
# sns.distplot(df.age)

In [None]:
# files = df.iloc[0:20]
# plt.figure(figsize=(15,15))
# for index, file, age, gender, race in files.itertuples():
#     plt.subplot(5,5, index+1)
#     img = load_img("/content/crop_part1/"+file)
#     img = np.array(img)
#     plt.imshow(img)
#     plt.title(f"Age: {age} Gender: {gender_dict[gender]}")
#     plt.axis('off')

In [8]:
from keras import backend as K


In [9]:
print(df.iloc[0,0].shape)

(128, 128)


In [30]:
X=df.drop('age',axis=1)
#X=df.iloc[:,0]
#X=X.to_list()
#tensor = tf.convert_to_tensor(X)
#print(tensor)
X=X.to_numpy()
Y=df['age'].astype('int32').to_numpy()
#print(df['image'])
#Y=np.asarray(Y).astype(np.float32)
# X=np.array(X)
# Y=np.array(Y)
#print(tensor)
#print(Y.shape)

In [None]:
#print(tensor)

tf.Tensor(
[[[244 242 240 ...  65  62  61]
  [246 243 238 ...  60  59  59]
  [247 243 236 ...  58  59  62]
  ...
  [158 156 155 ...  13  10  10]
  [153 147 142 ...  17  15  12]
  [137 127 118 ...  22  19  15]]

 [[ 10   5   9 ...  18  18  21]
  [  6   3   7 ...  15  16  19]
  [  8   7   9 ...  11  13  18]
  ...
  [ 19  15   9 ...  25  23  20]
  [ 22  19  12 ...  26  23  21]
  [ 28  33  32 ...  27  24  22]]

 [[ 78  73  73 ...  63  67  72]
  [ 74  69  70 ...  75  78  82]
  [ 69  64  70 ...  86  90  95]
  ...
  [ 40  40  39 ...  82  77  72]
  [ 39  39  38 ...  91  86  81]
  [ 38  38  38 ... 101  96  91]]

 ...

 [[ 10  18  43 ...  38  24  23]
  [ 12  21  43 ...  25  24  27]
  [ 18  27  41 ...  20  30  33]
  ...
  [236 246 246 ... 197 191 195]
  [240 246 244 ... 208 202 197]
  [243 247 245 ... 206 206 197]]

 [[239 239 238 ... 136 138 141]
  [241 241 240 ... 130 132 133]
  [242 242 242 ... 130 131 132]
  ...
  [112 109 105 ... 134 132 132]
  [111 108 105 ... 130 128 129]
  [111 108 105 ..

In [31]:
Xtrain,Xtest , Ytrain, Ytest = train_test_split(X,Y, test_size=0.25, random_state=42)

In [32]:
from keras.applications import VGG16

In [35]:
type(X)

numpy.ndarray

In [36]:
#feature extraction
VGG_model = VGG16(weights='imagenet', include_top=False, input_shape=(128,128, 3))
 
for layer in VGG_model.layers:
    layer.trainable = False
VGG_model.summary()
feature_extractor=VGG_model.predict(X)
features = feature_extractor.reshape(feature_extractor.shape[0], -1)
test_for_svm=features
feature_extractor_test=VGG_model.predict(X)
features_test = feature_extractor_test.reshape(feature_extractor_test.shape[0], -1)

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 128, 128, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 128, 128, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 128, 128, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 64, 64, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 64, 64, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 64, 64, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 32, 32, 128)       0     

ValueError: ignored

Creating model

In [None]:
from sklearn.preprocessing import Normalizer
from sklearn.neural_network import MLPRegressor
from tensorflow import keras
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
model = keras.Sequential()
model.add(Flatten(input_shape=(128,128)))
# model.add(Activation('relu'))

model.add(Dense(500))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(500))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(110))
model.add(Activation('softmax'))

opt = keras.optimizers.Adam(learning_rate=.1)
model.compile(loss='mean_squared_error', optimizer=opt, metrics=['accuracy'])

# fit model
hist = model.fit(tensor,Y,batch_size = 20, epochs=10, verbose=1, validation_split = .2)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
