# Gender CNN 

In [None]:
import os
import shutil
import random

seed = 1
random.seed(seed)
directory = "/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/celeba/img/" #  the paths are fixed, and will not work on a  
train = "/Users/devan/Desktop/AMLS_20-21_SN12345678/A1/data/train/"# different machine 
test = "/Users/devan/Desktop/AMLS_20-21_SN12345678/A1/data/test/"
validation = "/Users/devan/Desktop/AMLS_20-21_SN12345678/A1/data/validation/" # separarte paths are made to test, train and validation files

os.makedirs(train + "male/") # separate male and female directories are made in the train ,test and validation directories
os.makedirs(train + "female/")
os.makedirs(test + "male/")
os.makedirs(test + "female/")
os.makedirs(validation + "male/")
os.makedirs(validation + "female/")

test_examples = train_examples = validation_examples = 0

for line in open("/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/celeba/labels.csv").readlines()[1:]:
    split_line = line.split(",")
    img_file = split_line[1]     # The img file names are stored in 'img_file'
    male_female = split_line[2]  # the labels for male and female are stored in male_female
    
    random_num = random.random() # random number generator

    if random_num < 0.8:         # this stores 80% of data into the train folder
        location = train
        train_examples += 1

    elif random_num < 0.9:       # this stores 10% of data into the validaton folder
        location = validation
        validation_examples += 1

    else:
        location = test          # this stores 10% of data into the test folder
        test_examples += 1

    if int(float(male_female)) == -1:
        shutil.copy(
            "/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/celeba/img/" + img_file,
            location + "female/" + img_file + ".jpg", #if the img has a label -1 then store in female directory
        )

    elif int(float(male_female)) == 1:
        shutil.copy(
            "/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/celeba/img/" + img_file,
            location + "male/" + img_file + ".jpg", #if the img has a label 1 then store in male directory
        )

print(f"Number of training examples {train_examples}")
print(f"Number of test examples {test_examples}")
print(f"Number of validation examples {validation_examples}")

In [None]:
import tensorflow as tf
import math
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import roc_curve
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
train_examples = 3955
test_examples = 521
validation_examples = 524
img_height = img_width = 55
batch_size = 32

In [None]:
model = keras.Sequential(
    [
        keras.Input(shape=(55,55,1)), #We dont flatten the input since we are using convolutional neural networks
        layers.Conv2D(96, 5, strides = 1 , padding='same', activation = 'relu'),
        layers.MaxPooling2D(pool_size=(3,3), strides = 2,padding='same'),
        layers.Conv2D(256, 3, strides = 1 , padding='same', activation = 'relu'),
        layers.MaxPooling2D(pool_size=(2,2), strides = 2,padding='same'),
        layers.Conv2D(384, 3, strides = 1 , padding='same', activation = 'relu'),
        layers.Conv2D(256, 3, strides = 1 , padding='same', activation = 'relu'),
        layers.Flatten(),
        layers.Dense(1, activation='sigmoid'),
    ]
)


In [None]:
train_datagen = ImageDataGenerator(
    rescale = 1/255,
    rotation_range = 15,
    zoom_range = (0.95,0.95),
    horizontal_flip = True,
    vertical_flip = True,
    data_format = "channels_last",
    dtype = tf.float32,
)

validation_datagen = ImageDataGenerator(rescale = 1.0/255, dtype = tf.float32)
test_datagen = ImageDataGenerator(rescale = 1.0/255, dtype = tf.float32)

train_gen = train_datagen.flow_from_directory(
"/Users/devan/Desktop/AMLS_20-21_SN12345678/A1/data/train/",
target_size = (img_height, img_width),
batch_size = batch_size,
color_mode = "grayscale",
class_mode = "binary",   
shuffle = True,
seed = 123,
)

validation_gen = validation_datagen.flow_from_directory(
"/Users/devan/Desktop/AMLS_20-21_SN12345678/A1/data/validation/",
target_size = (img_height, img_width),
batch_size = batch_size,
color_mode = "grayscale",
class_mode = "binary",   
shuffle = True,
seed = 123,
)

test_gen = test_datagen.flow_from_directory(
"/Users/devan/Desktop/AMLS_20-21_SN12345678/A1/data/test/",
target_size = (img_height, img_width),
batch_size = batch_size,
color_mode = "grayscale",
class_mode = "binary",   
shuffle = True,
seed = 123,
)


In [None]:
model.compile(
    loss = keras.losses.BinaryCrossentropy(from_logits = False),
    optimizer = keras.optimizers.Adam(lr = 3e-4),
    metrics = ["accuracy"],
)

In [None]:
  history = model.fit(
    train_gen,
    epochs=10,
    batch_size = 10,
    verbose=2,
    steps_per_epoch=train_examples // batch_size,
    validation_data=validation_gen,
    validation_steps=validation_examples // batch_size,)

In [None]:
def plot_roc(labels, data):
    predictions = model.predict(data)
    fp, tp, _ = roc_curve(labels,predictions)
    
    plt.plot(100*fp, 100*tp)
    plt.xlabel("False")
    plt.ylabel("True")
    plt.grid()
    plt.show
    
test_labels = np.array([])
num_batches = 0

for _, y in test_gen:
    test_labels = np.append(test_labels, y)
    num_batches += 1
    if num_batches == math.ceil(test_examples / batch_size):
        break
        
plot_roc(test_labels, test_gen)

In [None]:
model.evaluate(test_gen, verbose=2)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend((['Train', 'Test']), loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend((['Train', 'Test']), loc='upper left')
plt.show()

# Gender SVM

In [None]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import classification_report,accuracy_score
import pandas as pd
from sklearn.datasets import load_iris

In [None]:
import landmarksGen as l2
import numpy as np
from sklearn.metrics import classification_report,accuracy_score
from sklearn import svm

In [None]:
def get_data():

    X, y = l2.extract_features_labels()
    Y = np.array([y, -(y - 1)]).T
    tr_X = X[:300]
    tr_Y = Y[:300]
    te_X = X[350:550]
    te_Y = Y[350:550]
    #print(y.shape)

    return tr_X, tr_Y, te_X, te_Y, y, Y



In [None]:
get_data()

In [None]:
tr_X, tr_Y, te_X, te_Y, y, Y = get_data()
print(te_X.shape)
print(te_X.reshape((te_X.size//136, 68*2)))

In [None]:
# sklearn functions implementation
def img_SVM(training_images, training_labels, test_images, test_labels):
    classifier = svm.SVC(kernel = 'poly')
    classifier.fit(training_images, training_labels)
    pred = classifier.predict(training_images)
    print("Accuracy:", accuracy_score(training_labels, pred))

   # print(pred)
    return pred

tr_X, tr_Y, te_X, te_Y, y, Y = get_data()
pred=img_SVM(tr_X.reshape((tr_X.size//136, 68*2)), list(zip(*tr_Y))[0], te_X.reshape((te_X.size//136, 68*2)), list(zip(*te_Y))[0])

    

# Emotion CNN

In [None]:
import os
import shutil
import random

seed = 1
random.seed(seed)
directory = "/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/celeba/img/" # this is doing the same as for gender, but for smiling or not smiling
train = "/Users/devan/Desktop/AMLS_20-21_SN12345678/A2/data1/train/"
test = "/Users/devan/Desktop/AMLS_20-21_SN12345678/A2/data1/test/"
validation = "/Users/devan/Desktop/AMLS_20-21_SN12345678/A2/data1/validation/"

os.makedirs(train + "smiling/")
os.makedirs(train + "n_smiling/")
os.makedirs(test + "smiling/")
os.makedirs(test + "n_smiling/")
os.makedirs(validation + "smiling/")
os.makedirs(validation + "n_smiling/")

test_examples = train_examples = validation_examples = 0

for line in open("/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/celeba/labels.csv").readlines()[1:]:
    split_line = line.split(",")
    img_file = split_line[1]
    s_ns = split_line[3]

    random_num = random.random()

    if random_num < 0.8:
        location = train
        train_examples += 1

    elif random_num < 0.9:
        location = validation
        validation_examples += 1

    else:
        location = test
        test_examples += 1

    if int(float(s_ns)) == 1:
        shutil.copy(
            "/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/celeba/img/" + img_file,
            location + "smiling/" + img_file + ".jpg",
        )

    elif int(float(s_ns)) == -1:
        shutil.copy(
            "/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/celeba/img/" + img_file,
            location + "n_smiling/" + img_file + ".jpg",
        )

print(f"Number of training examples {train_examples}")
print(f"Number of test examples {test_examples}")
print(f"Number of validation examples {validation_examples}")

In [None]:
import tensorflow as tf
import math
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import roc_curve
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
train_examples = 3955
test_examples = 521
validation_examples = 524
img_height = img_width = 90
batch_size = 32

In [None]:
model = keras.Sequential(
    [
        keras.Input(shape=(90,90,1)), #We dont flatten the input since we are using convolutional neural networks
        layers.Conv2D(32, 11, strides = 4 , padding='same', activation = 'relu'),
        layers.MaxPooling2D(pool_size=(3,3), strides = 2,padding='same'),
        layers.Conv2D(96, 5, strides = 1 , padding='same', activation = 'relu'),
        layers.MaxPooling2D(pool_size=(3,3), strides = 2,padding='same'),
        layers.Conv2D(128, 5, strides = 1 , padding='same', activation = 'relu'),
        layers.Conv2D(96, 5, strides = 1 , padding='same', activation = 'relu'),
        layers.Dropout(0.5),
        layers.Dense(160, activation='sigmoid'),
        layers.Flatten(),
        layers.Dense(1, activation='sigmoid'),
    ]
)

In [None]:
train_datagen = ImageDataGenerator(
    rescale = 1/255,
    rotation_range = 15,
    zoom_range = (0.95,0.95),
    horizontal_flip = True,
    vertical_flip = True,
    data_format = "channels_last",
    dtype = tf.float32,
)

validation_datagen = ImageDataGenerator(rescale = 1.0/255, dtype = tf.float32)
test_datagen = ImageDataGenerator(rescale = 1.0/255, dtype = tf.float32)

train_gen = train_datagen.flow_from_directory(
"/Users/devan/Desktop/AMLS_20-21_SN12345678/A2/data1/train/",
target_size = (img_height, img_width),
batch_size = batch_size,
color_mode = "grayscale",
class_mode = "binary",   
shuffle = True,
seed = 123,
)

validation_gen = validation_datagen.flow_from_directory(
"/Users/devan/Desktop/AMLS_20-21_SN12345678/A2/data1/validation/",
target_size = (img_height, img_width),
batch_size = batch_size,
color_mode = "grayscale",
class_mode = "binary",   
shuffle = True,
seed = 123,
)

test_gen = test_datagen.flow_from_directory(
"/Users/devan/Desktop/AMLS_20-21_SN12345678/A2/data1/test/",
target_size = (img_height, img_width),
batch_size = batch_size,
color_mode = "grayscale",
class_mode = "binary",   
shuffle = True,
seed = 123,
)


In [None]:
model.compile(
    loss = keras.losses.BinaryCrossentropy(from_logits = False),
    optimizer = keras.optimizers.Adam(lr = 3e-4),
    metrics = ["accuracy"],
)

In [None]:
history = model.fit(
train_gen,
epochs=10,
verbose=2,
steps_per_epoch=train_examples // batch_size,
validation_data=validation_gen,
validation_steps=validation_examples // batch_size,)

In [None]:
def plot_roc(labels, data):
    predictions = model.predict(data)
    fp, tp, _ = roc_curve(labels,predictions)
    
    plt.plot(100*fp, 100*tp)
    plt.xlabel("False")
    plt.ylabel("True")
    plt.grid()
    plt.show
    
test_labels = np.array([])
num_batches = 0

for _, y in test_gen:
    test_labels = np.append(test_labels, y)
    num_batches += 1
    if num_batches == math.ceil(test_examples / batch_size):
        break
        
plot_roc(test_labels, test_gen)

In [None]:
model.evaluate(test_gen, verbose=2)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend((['Train', 'Test']), loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend((['Train', 'Test']), loc='upper left')
plt.show()

# Emotion SVM

In [None]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import classification_report,accuracy_score
import pandas as pd
from sklearn.datasets import load_iris

In [None]:
import landmarksEmot as l2
import numpy as np
from sklearn.metrics import classification_report,accuracy_score
from sklearn import svm

In [None]:
def get_data():

    X, y = l2.extract_features_labels()
    Y = np.array([y, -(y - 1)]).T
    tr_X = X[:300]
    tr_Y = Y[:300]
    te_X = X[350:550]
    te_Y = Y[350:550]
    #print(y.shape)

    return tr_X, tr_Y, te_X, te_Y, y, Y



In [None]:
get_data()

In [None]:
tr_X, tr_Y, te_X, te_Y, y, Y = get_data()
print(te_X.shape)
print(te_X.reshape((te_X.size//136, 68*2)))

In [None]:
# sklearn functions implementation
def img_SVM(training_images, training_labels, test_images, test_labels):
    classifier = svm.SVC(kernel = 'poly')
    classifier.fit(training_images, training_labels)
    pred = classifier.predict(training_images)
    print("Accuracy:", accuracy_score(training_labels, pred))

   # print(pred)
    return pred

tr_X, tr_Y, te_X, te_Y, y, Y = get_data()
pred=img_SVM(tr_X.reshape((tr_X.size//136, 68*2)), list(zip(*tr_Y))[0], te_X.reshape((te_X.size//136, 68*2)), list(zip(*te_Y))[0])

    

# FaceShape

In [None]:
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
from IPython.display import display
%matplotlib inline
import pandas as pd
import numpy as np
from PIL import Image
from skimage.feature import hog
from skimage.color import rgb2grey
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import roc_curve, auc

In [None]:
labels = pd.read_csv("/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/cartoon_set/labels.csv", index_col=0)
labels.head()
sample_size = 150
labels=labels.drop(labels.index[sample_size:])
labels.drop("eye_color",axis=1)

In [None]:
def get_image(row_id, root="/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/cartoon_set/img"):
    """
    Converts an image number into the file path where the image is located, 
    opens the image, and returns the image as a numpy array.
    """
    filename = "{}.png".format(row_id)
    file_path = os.path.join(root, filename)
    img = Image.open(file_path)
    return np.array(img)

shape1_row = labels[labels.face_shape == 4.0].index[0]
plt.imshow(get_image(shape1_row))
plt.show()
shape2_row = labels[labels.face_shape == 3.0].index[0]
plt.imshow(get_image(shape2_row))
plt.show()

In [None]:
# load a bombus image using our get_image function and bombus_row from the previous cell
shape1 = get_image(shape1_row)
shape2 = get_image(shape2_row)
print('Color bombus image has shape: ', shape1)

# convert the bombus image to greyscale
grey_shape1 = rgb2grey(shape1)

plt.imshow(grey_shape1, cmap=mpl.cm.gray)

print('Greyscale bombus image has shape: ', grey_shape1)

In [None]:
hog_features, hog_image = hog(grey_shape1,
                              visualize=True,
                              block_norm='L2-Hys',
                              pixels_per_cell=(13, 13))

plt.imshow(hog_image, cmap=mpl.cm.gray)

In [None]:
def create_features(img):
    # flatten three channel color image
    color_features = img.flatten()
    # convert image to greyscale
    grey_image = rgb2grey(img)
    # get HOG features from greyscale image
    hog_features = hog(grey_image, block_norm='L2-Hys', pixels_per_cell=(16, 16))
    # combine color and hog features into a single array
    flat_features = np.hstack(color_features)
    return flat_features

shape2_features = create_features(shape2)

print(shape2_features.shape)

In [None]:
def create_feature_matrix(label_dataframe):
    features_list = []
    
    for img_id in label_dataframe.index:
        # load image
        img = get_image(img_id)
        # get features for image
        image_features = create_features(img)
        features_list.append(image_features)
        
    # convert list of arrays into a matrix
    feature_matrix = np.array(features_list)
    return feature_matrix

# run create_feature_matrix on our dataframe of images
feature_matrix = create_feature_matrix(labels)
print(feature_matrix)

In [None]:
# get shape of feature matrix
print('Feature matrix shape is: ', feature_matrix.shape)

# define standard scaler
ss = StandardScaler()
# run this on our feature matrix
shape_stand = ss.fit_transform(feature_matrix)

pca = PCA(n_components=500)
# use fit_transform to run PCA on our standardized matrix
shape_pca = ss.fit_transform(shape_stand)
# look at new shape
print('PCA matrix shape is: ', shape_pca.shape)

In [None]:
X = pd.DataFrame(shape_pca)
y = pd.Series(labels.face_shape.values)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.3,
                                                    random_state=1234123)

# look at the distrubution of labels in the train set
pd.Series(y_train).value_counts()

In [None]:
# define support vector classifier
svm = SVC(kernel='linear', probability=True, random_state=42)

# fit model
svm.fit(X_train, y_train)

In [None]:
# generate predictions
y_pred = svm.predict(X_test)

# calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Model accuracy is: ', accuracy)

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators = 6)
model.fit(X_train, y_train)

In [None]:
model.score(X_test, y_test)

In [None]:
# predict probabilities for X_test using predict_proba
probabilities = svm.predict_proba(X_test)

# select the probabilities for labels 0,1,2,3 and 4
y_proba0 = probabilities[:, 0]
y_proba1 = probabilities[:, 1]
y_proba2 = probabilities[:, 2]
y_proba3 = probabilities[:, 3]
y_proba4 = probabilities[:, 4]
# calculate false positive rate and true positive rate at different thresholds
false_positive_rate0, true_positive_rate0, thresholds = roc_curve(y_test, y_proba0, pos_label=1)
false_positive_rate1, true_positive_rate1, thresholds = roc_curve(y_test, y_proba1, pos_label=1)
false_positive_rate2, true_positive_rate2, thresholds = roc_curve(y_test, y_proba2, pos_label=1)
false_positive_rate3, true_positive_rate3, thresholds = roc_curve(y_test, y_proba3, pos_label=1)
false_positive_rate4, true_positive_rate4, thresholds = roc_curve(y_test, y_proba4, pos_label=1)
# calculate AUC
roc_auc0 = auc(false_positive_rate0, true_positive_rate0)
roc_auc1 = auc(false_positive_rate1, true_positive_rate1)
roc_auc2 = auc(false_positive_rate2, true_positive_rate2)
roc_auc3 = auc(false_positive_rate3, true_positive_rate3)
roc_auc4 = auc(false_positive_rate4, true_positive_rate4)

plt.title('Receiver Operating Characteristic')
# plot the false positive rate on the x axis and the true positive rate on the y axis
roc_plot0 = plt.plot(false_positive_rate0,
                    true_positive_rate0,
                    label='AUC1 = {:0.2f}'.format(roc_auc0))
roc_plot1 = plt.plot(false_positive_rate1,
                    true_positive_rate1,
                    label='AUC2 = {:0.2f}'.format(roc_auc1))
roc_plot2 = plt.plot(false_positive_rate2,
                    true_positive_rate2,
                    label='AUC3 = {:0.2f}'.format(roc_auc2))
roc_plot3 = plt.plot(false_positive_rate3,
                    true_positive_rate3,
                    label='AUC4 = {:0.2f}'.format(roc_auc3))
roc_plot4 = plt.plot(false_positive_rate4,
                    true_positive_rate4,
                    label='AUC5 = {:0.2f}'.format(roc_auc4))
plt.legend(loc=0)
plt.plot([0,1], [0,1], ls='--')
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate');

# Eye Colour Recognition

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.preprocessing import image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
%matplotlib inline

In [None]:
train = pd.read_csv("/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/cartoon_set/labels.csv")
train.columns
train.rename( columns={'Unnamed: 0':'Id'}, inplace=True )
train

In [None]:
train_image = []
for i in tqdm(range(train.shape[0])):
    img = image.load_img("/Users/devan/Desktop/AMLS_20-21_SN12345678/Datasets/cartoon_set/img/"+str(train['Id'][i])+'.png',target_size=(50,50,3))
    img = image.img_to_array(img)
    img = img/255
    train_image.append(img)
X = np.array(train_image)

In [None]:
X.shape
X1 = X[:500]
X1.shape

In [None]:
plt.imshow(X1[2])

In [None]:
y = np.array(train)
y1 = y[:500]
y1.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X1, y1, random_state=42, test_size=0.1)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
X_train1 = X_train.reshape(450,3*50*50)
X_test1 = X_test.reshape(50,3*50*50)

In [None]:
# define support vector classifier
svm = SVC(kernel='poly', probability=True, random_state=42)

# fit model
svm.fit(X_train1, y_train)

In [None]:
# generate predictions
y_pred = svm.predict(X_test1)

# calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Model accuracy is: ', accuracy)

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators = 40)
model.fit(X_train1, y_train)

In [None]:
model.score(X_test1, y_test)