# Importing Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.layers as tfl
import pandas as pd
import zipfile

# Loading Data

In [None]:
with zipfile.ZipFile('/kaggle/input/facial-keypoints-detection/training.zip', 'r') as zip_ref:
    zip_ref.extractall()
with zipfile.ZipFile('/kaggle/input/facial-keypoints-detection/test.zip', 'r') as zip_ref:
    zip_ref.extractall()

In [None]:
train_data_df = pd.read_csv('training.csv')
test_data_df = pd.read_csv('test.csv')

# Describing Data

In [None]:
train_data_df.head()

In [None]:
test_data_df.head()

In [None]:
pd.Series(train_data_df.columns)

In [None]:
pd.Series(test_data_df.columns)

In [None]:
print(train_data_df.info())

In [None]:
print(test_data_df.info())

In [None]:
for col in train_data_df.columns.tolist():          
    print('{} column missing values: {}'.format(col, train_data_df[col].isnull().sum()))

In [None]:
train_data_df[train_data_df['left_eye_center_x'].isnull()]

In [None]:
train_data_df[train_data_df['left_eye_inner_corner_x'].isnull()]

# Splitting data to train, test, and labels

In [None]:
def process_data(data_df,train):
    if train:
        y=np.array(data_df.iloc[:,:30])

    img_dt = []
    for i in range(len(data_df)):
        img_dt.append(data_df['Image'][i].split(' '))

    X=np.array(img_dt, dtype='float')
    return X,y if train else " "

In [None]:
X_train,y_train=process_data(train_data_df,True)
X_test_submit,_=process_data(test_data_df,False)

In [None]:
print(X_train)
print(y_train)
print(X_test_submit)

# Fixing null values

In [None]:
from sklearn.impute import SimpleImputer
imputer=SimpleImputer(missing_values=np.nan, strategy='mean')

for i in range(30):
    imputer.fit(y_train[:,i].reshape(-1,1))
    y_train[:,i]=(imputer.transform(y_train[:,i].reshape(-1,1))).reshape(-1,)

In [None]:
print(y_train[556,0])
print(y_train[2239,0])
print(y_train[1600,4])
print(y_train[1654,4])

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test_submit.shape)

In [None]:
X_train=X_train.reshape(X_train.shape[0],96,96)
X_test_submit=X_test_submit.reshape(X_test_submit.shape[0],96,96)

# Exploring Data

In [None]:
plt.imshow(X_train[0], cmap='gray')
plt.title("Input Image")
plt.savefig('plot.png', bbox_inches='tight')
plt.show()

In [None]:
plt.imshow(X_test_submit[0], cmap='gray')
plt.title("Input Image")
plt.savefig('plot.png', bbox_inches='tight')
plt.show()

In [None]:
plt.imshow(X_train[0], cmap='gray')
plt.scatter(y_train[0][0::2], y_train[0][1::2], c='red', marker='o')
plt.title("Image with Facial Keypoints")
plt.show()

In [None]:
X_train=X_train/255.0
X_test_submit=X_test_submit/255.0

In [None]:
Final_X_train=X_train
Final_y_train=y_train

# Splitting training data to train and test to evaluate the models

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X_train,y_train,test_size=0.2,random_state=1)

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(64)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(64)

# Function to train models on different architectures

In [None]:
def model_arch(arch):
    model=(arch((96, 96, 1)))
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss='mse',
                  metrics=['accuracy'])
    model.summary()
    history = model.fit(train_dataset,epochs=100,validation_data=test_dataset,batch_size=64,shuffle=True)
    return model

In [None]:
def arch(input_shape):

    input_img = tf.keras.Input(shape=input_shape)

    layer=tfl.Conv2D(filters= 38 , kernel_size= 5,strides=(2, 2))(input_img)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)

    layer=tfl.Conv2D(filters= 114 , kernel_size= 3 ,strides=(2, 2))(layer)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2,2), padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)

    layer=tfl.Conv2D(filters= 196 , kernel_size= 3 ,strides=(2, 2))(layer)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2,2), padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)

    layer=tfl.Flatten()(layer)

    layer=tfl.Dense(units=100, activation='relu')(layer)
    layer=tfl.Dropout(0.2)(layer)

    outputs=tfl.Dense(units= 30 , activation='linear')(layer)
    model = tf.keras.Model(inputs=input_img, outputs=outputs)
    return model

In [None]:
model1=model_arch(arch)

In [None]:
def arch2(input_shape):

    input_img = tf.keras.Input(shape=input_shape)

    layer=tfl.Conv2D(filters= 38 , kernel_size= 5,strides=(2, 2),padding='same')(input_img)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(layer)

    layer=tfl.Conv2D(filters= 114 , kernel_size= 3 ,strides=(2, 2),padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2,2), padding='same')(layer)

    layer=tfl.Conv2D(filters= 196 , kernel_size= 3 ,strides=(2, 2), padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2,2), padding='same')(layer)

    layer=tfl.Flatten()(layer)

    layer=tfl.Dense(units=100, activation='relu')(layer)
    layer=tfl.Dropout(0.2)(layer)

    outputs=tfl.Dense(units= 30 , activation='linear')(layer)
    model = tf.keras.Model(inputs=input_img, outputs=outputs)
    return model

In [None]:
model2=model_arch(arch2)

In [None]:
def arch3(input_shape):

    input_img = tf.keras.Input(shape=input_shape)

    layer=tfl.Conv2D(filters= 38 , kernel_size= 5,strides=(2, 2),padding='same')(input_img)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(layer)

    layer=tfl.Conv2D(filters= 114 , kernel_size= 3 ,strides=(2, 2),padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2,2), padding='same')(layer)

    layer=tfl.Conv2D(filters= 196 , kernel_size= 3 ,strides=(2, 2), padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2,2), padding='same')(layer)

    layer=tfl.Flatten()(layer)

    layer=tfl.Dense(units=500, activation='relu')(layer)
    layer=tfl.Dropout(0.2)(layer)

    outputs=tfl.Dense(units= 30 , activation='linear')(layer)
    model = tf.keras.Model(inputs=input_img, outputs=outputs)
    return model

In [None]:
model3=model_arch(arch3)

In [None]:
def arch4(input_shape):

    input_img = tf.keras.Input(shape=input_shape)

    layer=tfl.Conv2D(filters= 38 , kernel_size= 5,strides=(2, 2),padding='same')(input_img)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(layer)

    layer=tfl.Conv2D(filters= 114 , kernel_size= 3 ,strides=(2, 2),padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    
    layer=tfl.Conv2D(filters= 196 , kernel_size= 3 ,strides=(2, 2), padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)

    layer=tfl.Flatten()(layer)

    layer=tfl.Dense(units=500, activation='relu')(layer)
    layer=tfl.Dropout(0.2)(layer)

    outputs=tfl.Dense(units= 30 , activation='linear')(layer)
    model = tf.keras.Model(inputs=input_img, outputs=outputs)
    return model

In [None]:
model4=model_arch(arch4)

In [None]:
def arch5(input_shape):

    input_img = tf.keras.Input(shape=input_shape)
    
    layer=tfl.Conv2D(filters= 38 , kernel_size= 5,strides=(2, 2),padding='same')(input_img)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(layer)

    layer=tfl.DepthwiseConv2D(kernel_size= 3 ,strides=(2, 2),padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    
    layer=tfl.Conv2D(filters= 196 , kernel_size= 3,strides=(2, 2),padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(layer)
    
    layer=tfl.Flatten()(layer)

    layer=tfl.Dense(units=500, activation='relu')(layer)
    layer=tfl.Dropout(0.2)(layer)

    outputs=tfl.Dense(units= 30 , activation='linear')(layer)
    model = tf.keras.Model(inputs=input_img, outputs=outputs)
    return model

In [None]:
model5=model_arch(arch5)

In [None]:
def arch6(input_shape):

    input_img = tf.keras.Input(shape=input_shape)
    
    layer=tfl.Conv2D(filters= 38 , kernel_size= 5,strides=(2, 2),padding='same')(input_img)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(layer)

    layer=tfl.DepthwiseConv2D(kernel_size= 3 ,strides=(2, 2),padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    
    layer=tfl.Conv2D(filters= 196 , kernel_size= 3,strides=(2, 2),padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    
    layer=tfl.Flatten()(layer)

    layer=tfl.Dense(units=500, activation='relu')(layer)
    layer=tfl.Dropout(0.2)(layer)

    outputs=tfl.Dense(units= 30 , activation='linear')(layer)
    model = tf.keras.Model(inputs=input_img, outputs=outputs)
    return model

In [None]:
model6=model_arch(arch6)

In [None]:
# trying to use skip connetion technique
def identity_block(X, f, filters, training=True):
    
    F1, F2, F3 = filters

    X_shortcut = X

    X = tfl.Conv2D(filters = F1, kernel_size = 1, strides = (1,1), padding = 'valid')(X)
    X = tfl.BatchNormalization(axis = 3)(X, training = training) 
    X = tfl.Activation('relu')(X)

    X = tfl.Conv2D(filters = F2, kernel_size = f, strides = (1,1), padding = 'same')(X)
    X = tfl.BatchNormalization(axis = 3)(X, training = training) 
    X = tfl.Activation('relu')(X)

    X = tfl.Conv2D(filters = F3, kernel_size = 1, strides = (1,1), padding = 'valid')(X)
    X = tfl.BatchNormalization(axis = 3)(X, training = training)

    X = tfl.Add()([X_shortcut,X])
    X = tfl.Activation('relu')(X)

    return X

In [None]:
def arch7(input_shape):

    input_img = tf.keras.Input(shape=input_shape)
    
    layer=tfl.Conv2D(filters= 38 , kernel_size= 5,strides=(2, 2),padding='same')(input_img)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    layer=tfl.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(layer)
    
    layer=tfl.DepthwiseConv2D(kernel_size= 3 ,strides=(2, 2),padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    
    layer=identity_block(layer,1,[56,100,38])
    
    layer=tfl.Conv2D(filters= 196 , kernel_size= 3,strides=(2, 2),padding='same')(layer)
    layer=tfl.BatchNormalization(axis=3)(layer,training=True)
    layer=tfl.ReLU()(layer)
    
    layer=tfl.Flatten()(layer)

    layer=tfl.Dense(units=500, activation='relu')(layer)
    layer=tfl.Dropout(0.2)(layer)

    outputs=tfl.Dense(units= 30 , activation='linear')(layer)
    model = tf.keras.Model(inputs=input_img, outputs=outputs)
    return model

In [None]:
model7=model_arch(arch7)

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((Final_X_train, Final_y_train)).batch(64)

# train one of the best models on all training data

In [None]:
model4_Final=(arch4((96, 96, 1)))
model4_Final.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss='mse',
                  metrics=['accuracy'])
model4_Final.summary()
history = model4_Final.fit(train_dataset,epochs=150,batch_size=64,shuffle=True)

In [None]:
model4_Final.save('model')

# Downloading the model

In [None]:
import zipfile
import os
from IPython.display import FileLink

def zip_dir(directory = os.curdir, file_name = 'model.zip'):
    """
    zip all the files in a directory

    Parameters
    _____
    directory: str
        directory needs to be zipped, defualt is current working directory

    file_name: str
        the name of the zipped file (including .zip), default is 'directory.zip'

    Returns
    _____
    Creates a hyperlink, which can be used to download the zip file)
    """
    os.chdir(directory)
    zip_ref = zipfile.ZipFile(file_name, mode='w')
    for folder, _, files in os.walk(directory):
        for file in files:
            if file_name in file:
                pass
            else:
                zip_ref.write(os.path.join(folder, file))

    return FileLink(file_name)

In [None]:
zip_dir()

# train one of the best models on all training data

In [None]:
model6_Final=(arch6((96, 96, 1)))
model6_Final.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss='mse',
                  metrics=['accuracy'])
model6_Final.summary()
history = model6_Final.fit(train_dataset,epochs=150,batch_size=64,shuffle=True)

# Making prediction by both best models

In [None]:
predictions1 = model4_Final.predict(X_test_submit)
predictions2 = model6_Final.predict(X_test_submit)

# Viewing some test samples

In [None]:
def plot_sample(image, keypoint, axis, title):
    axis.imshow(image, cmap='gray')
    axis.scatter(keypoint[0::2], keypoint[1::2], marker='o', s=20, color='r')
    plt.title(title)

In [None]:
fig = plt.figure(figsize=(20,16))
for i in range(20):
    axis = fig.add_subplot(4, 5, i+1, xticks=[], yticks=[])
    plot_sample(X_test_submit[i], predictions1[i], axis, "")
plt.show()

In [None]:
fig = plt.figure(figsize=(20,16))
for i in range(20):
    axis = fig.add_subplot(4, 5, i+1, xticks=[], yticks=[])
    plot_sample(X_test_submit[i], predictions2[i], axis, "")
plt.show()

# Submitting predictions on test data

In [None]:
lookup = pd.read_csv('/kaggle/input/facial-keypoints-detection/IdLookupTable.csv')
lookup.head()

In [None]:
feature = list(lookup['FeatureName'])
image_ids = list(lookup['ImageId']-1)
row_ids = lookup['RowId']
pre_list = list(predictions1)

feature_list = []
for f in feature:
    feature_list.append(feature.index(f))

final_preds = []
for x,y in zip(image_ids, feature_list):
    final_preds.append(pre_list[x][y])
    
row_ids = pd.Series(row_ids, name = 'RowId')
locations = pd.Series(final_preds, name = 'Location')
locations = locations.clip(0.0,96.0)

submission_result = pd.concat([row_ids,locations],axis = 1)
submission_result

In [None]:
submission_result.to_csv('submission.csv',index = False)