In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
plt.style.use('dark_background')
from skimage.io import imshow
from matplotlib.patches import Circle

#get and preprocess the data
def get_train_test_model(train_path,test_path):
    train_data=pd.read_csv(train_path)
    test_data=pd.read_csv(test_path)
    return train_data,test_data

def get_image_data(data_frame):
    image_data=[]
    for i in range(len(data_frame)):
        temp_image=data_frame.iloc[i,-1] #data from the last column
        temp_image=np.array(temp_image.split(' ')).astype(int)
        temp_image=np.reshape(temp_image,(96,96))
        temp_image=temp_image/255
        image_data.append(temp_image)
    return image_data   

def refine_train_data(train_data):
    image_data_train=get_image_data(train_data)
    key_points_train=[]
    for i in range(len(train_data)):
        points = train_data.iloc[i,:-1]
        keypoints=train_data.iloc[i,:-1].astype(int).values
        keypoints=keypoints/96 - 0.5
        key_points_train.append(keypoints)
    return np.array(image_data_train),np.array(key_points_train)

def refine_test_data(test_data):
    image_data_test=get_image_data(test_data) 
    return np.array(image_data_test)

def data_preprocessing(train_data,test_data):
    train_data=train_data.dropna()
    test_data=test_data.dropna()
    image_data_train,key_points_train=refine_train_data(train_data)
    image_data_test=refine_test_data(test_data)
    augmented_img_train=[]
    augmented_points_train=[]
    for i,image in enumerate(image_data_train):
        aug_img,aug_points = data_augmentation(image,key_points_train[i])
        augmented_img_train.append(aug_img)
        augmented_points_train.append(aug_points)
    train_image_data_net=np.concatenate((image_data_train,augmented_img_train),axis=0)
    train_points_data_net=np.concatenate((key_points_train,augmented_points_train),axis=0)
    return image_data_train,key_points_train,train_image_data_net,train_points_data_net,image_data_test

#data augmentation to increase diversity of data available for training models
#techniques - cropping,padding,horizontal flipping --for traiing deep learning models
def data_augmentation(image_data,key_points):
    #mirroring along the vertical axis
    mirror_img=image_data[:,::-1]
    for index in range(0,len(key_points),2):
        #denormalize x-coordinate
        x_denorm = (key_points[index]+0.5)*96
        distance_mid = x_denorm - 48
        x_flipped = x_denorm - 2*distance_mid
        key_points[index]=x_flipped/96 - 0.5
    return image_data,key_points

def GetAndPreprocessData():
    train_path="training.csv"
    test_path="test.csv"
    train_data,test_data=get_train_test_model(train_path,test_path)
    image_data_train,key_points_train,train_image_data_net,train_points_data_net,image_data_test=data_preprocessing(train_data,test_data);
    #for i in range(len(image_data_train)):
     #   plot_img_keypoints(train_image_data_net[i],train_points_data_net[i])
    return train_image_data_net,train_points_data_net,image_data_test

#matplotlib.patches.Circle --create a circle at teg centre xy=(x,y) given the radius
def plot_img_keypoints(image,key_points):
    #matplotlib.pyplot.subplots -- create a figure with a set of subplots
    figure,axes = plt.subplots(1) #nrows,ncols -- default = 1
    axes.set_aspect('equal')
    imshow(image)
    for i in range(0,len(key_points),2):
        x_denorm = (key_points[i]+0.5)*96
        y_denorm = (key_points[i+1]+0.5)*96
        axes.add_patch(Circle((x_denorm,y_denorm),1,color='red'))
    plt.show()
    
def get_processed_train_and_test_data():
    train_image_data,train_points_data,image_data_test=GetAndPreprocessData()
    processed_train_image_data=np.reshape(train_image_data,(train_image_data.shape[0],train_image_data.shape[1],train_image_data.shape[2],1))
    return processed_train_image_data,train_points_data,image_data_test
