In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import trange
from scipy.stats.stats import pearsonr
from sklearn.metrics import r2_score

In [2]:
train_data = pd.read_csv("./data/training.csv")
test_data = pd.read_csv("./data/test.csv")
train_data.fillna(method='ffill', inplace=True)

In [3]:
def append_X(X):
    return [np.asarray(image.split(' '), dtype= 'float').reshape(96, 96) for image in X.Image]

In [4]:
X_train = append_X(train_data) 

In [5]:
X_test = append_X(test_data)

In [6]:
y_train = train_data.iloc[:, :-1].values
train_data.head()

Unnamed: 0,left_eye_center_x,left_eye_center_y,right_eye_center_x,right_eye_center_y,left_eye_inner_corner_x,left_eye_inner_corner_y,left_eye_outer_corner_x,left_eye_outer_corner_y,right_eye_inner_corner_x,right_eye_inner_corner_y,...,nose_tip_y,mouth_left_corner_x,mouth_left_corner_y,mouth_right_corner_x,mouth_right_corner_y,mouth_center_top_lip_x,mouth_center_top_lip_y,mouth_center_bottom_lip_x,mouth_center_bottom_lip_y,Image
0,66.033564,39.002274,30.227008,36.421678,59.582075,39.647423,73.130346,39.969997,36.356571,37.389402,...,57.066803,61.195308,79.970165,28.614496,77.388992,43.312602,72.935459,43.130707,84.485774,238 236 237 238 240 240 239 241 241 243 240 23...
1,64.332936,34.970077,29.949277,33.448715,58.85617,35.274349,70.722723,36.187166,36.034723,34.361532,...,55.660936,56.421447,76.352,35.122383,76.04766,46.684596,70.266553,45.467915,85.48017,219 215 204 196 204 211 212 200 180 168 178 19...
2,65.057053,34.909642,30.903789,34.909642,59.412,36.320968,70.984421,36.320968,37.678105,36.320968,...,53.538947,60.822947,73.014316,33.726316,72.732,47.274947,70.191789,47.274947,78.659368,144 142 159 180 188 188 184 180 167 132 84 59 ...
3,65.225739,37.261774,32.023096,37.261774,60.003339,39.127179,72.314713,38.380967,37.618643,38.754115,...,54.166539,65.598887,72.703722,37.245496,74.195478,50.303165,70.091687,51.561183,78.268383,193 192 193 194 194 194 193 192 168 111 50 12 ...
4,66.725301,39.621261,32.24481,38.042032,58.56589,39.621261,72.515926,39.884466,36.98238,39.094852,...,64.889521,60.671411,77.523239,31.191755,76.997301,44.962748,73.707387,44.227141,86.871166,147 148 160 196 215 214 216 217 219 220 206 18...


In [7]:
X_train = np.array(X_train).reshape(len(X_train), -1)

In [8]:
y_train

array([[66.03356391, 39.00227368, 30.22700752, ..., 72.93545865,
        43.13070677, 84.48577444],
       [64.33293617, 34.9700766 , 29.9492766 , ..., 70.26655319,
        45.46791489, 85.48017021],
       [65.05705263, 34.90964211, 30.90378947, ..., 70.19178947,
        47.27494737, 78.65936842],
       ...,
       [66.69073171, 36.84522146, 31.66641951, ..., 75.96359236,
        49.46257171, 78.11712   ],
       [70.96508235, 39.85366588, 30.54328471, ..., 75.96359236,
        50.06518588, 79.58644706],
       [66.93831111, 43.42450963, 31.09605926, ..., 75.96359236,
        45.90048   , 82.7730963 ]])

In [9]:
def group_in_pairs(ung_features):
    return map(lambda item: list(zip(*[item[i::2] for i in range(2)])), ung_features)

In [10]:
train_landmarks_coordinates = np.array(list(group_in_pairs(y_train)))

In [11]:
train_landmarks_coordinates

array([[[66.03356391, 39.00227368],
        [30.22700752, 36.4216782 ],
        [59.58207519, 39.64742256],
        ...,
        [28.61449624, 77.38899248],
        [43.3126015 , 72.93545865],
        [43.13070677, 84.48577444]],

       [[64.33293617, 34.9700766 ],
        [29.9492766 , 33.44871489],
        [58.85617021, 35.27434894],
        ...,
        [35.12238298, 76.04765957],
        [46.68459574, 70.26655319],
        [45.46791489, 85.48017021]],

       [[65.05705263, 34.90964211],
        [30.90378947, 34.90964211],
        [59.412     , 36.32096842],
        ...,
        [33.72631579, 72.732     ],
        [47.27494737, 70.19178947],
        [47.27494737, 78.65936842]],

       ...,

       [[66.69073171, 36.84522146],
        [31.66641951, 39.68504195],
        [59.25534694, 36.06514286],
        ...,
        [34.23275908, 77.3394291 ],
        [51.59945296, 75.96359236],
        [49.46257171, 78.11712   ]],

       [[70.96508235, 39.85366588],
        [30.54328471, 40.77

In [12]:
average_coordinates = train_landmarks_coordinates.mean(axis=0)

In [13]:
average_coordinates

array([[66.35858788, 37.64913961],
       [30.30663068, 37.97590128],
       [59.22406008, 36.67573061],
       [73.74677489, 35.61996104],
       [37.21417075, 38.94284896],
       [22.16244559, 39.58834765],
       [54.19462033, 29.77441764],
       [80.46890713, 28.53479087],
       [38.95416945, 32.07073889],
       [15.13827922, 34.18830542],
       [48.37418629, 62.7158836 ],
       [69.58693209, 73.19598525],
       [33.79967979, 76.96454597],
       [50.42529935, 74.98252873],
       [48.56597567, 78.97925622]])

In [32]:
feature_id = 0
box_x = int(average_coordinates[feature_id][0])
box_y = int(average_coordinates[feature_id][1])
box_range = 10

In [33]:
X_train

array([[238., 236., 237., ...,  70.,  75.,  90.],
       [219., 215., 204., ...,   1.,   1.,   1.],
       [144., 142., 159., ...,  78.,  78.,  77.],
       ...,
       [ 74.,  74.,  74., ...,  20.,  20.,  20.],
       [254., 254., 254., ..., 254., 254., 254.],
       [ 53.,  62.,  67., ..., 158., 158., 159.]])

In [34]:
X_new = X_train.reshape(X_train.shape[0], 96, 96)

In [35]:
X_new = np.array(list(map(lambda image: image[box_x - box_range: box_x + box_range, box_y - box_range: box_y + box_range], X_new)))

In [36]:
alpha = 0.001
iterations = 150

In [37]:
def hypothesis(theta, X, n):
    h = np.ones((X.shape[0],1))
    theta = theta.reshape(1,n+1)
    for i in range(0,X.shape[0]):
        h[i] = float(np.matmul(theta, X[i]))
    h = h.reshape(X.shape[0])
    return h

In [38]:
def BGD(theta, alpha, num_iters, h, X, y, n):
    cost = np.ones(num_iters)
    for i in trange(0,num_iters):
        theta[0] = theta[0] - (alpha/X.shape[0]) * sum(h - y)
        for j in range(1,n+1):
            theta[j] = theta[j] - (alpha/X.shape[0]) * sum((h-y) * X.transpose()[j])
        h = hypothesis(theta, X, n)
        cost[i] = (1/X.shape[0]) * 0.5 * sum(np.square(h - y))
        print(cost)
    theta = theta.reshape(1,n+1)
    return theta, cost

In [39]:
def linear_regression(X, y, alpha, num_iters):
    n = X.shape[1]
    one_column = np.ones((X.shape[0],1))
    X = np.concatenate((one_column, X), axis = 1)
    print(X.shape, one_column.shape)
    # initializing the parameter vector...
    theta = np.zeros(n+1)
    # hypothesis calculation....
    h = hypothesis(theta, X, n)
    # returning the optimized parameters by Gradient Descent...
    theta, cost = BGD(theta,alpha,num_iters,h,X,y,n)
    return theta, cost

In [40]:
X_new = np.array(X_new).reshape(len(X_new), -1)

In [41]:
X_new.shape

(7049, 400)

In [42]:
X = (X_new - X_new.mean()) / X_new.std()
y = (y_train - y_train.mean()) / y_train.std()

In [None]:
theta, cost = linear_regression(X, y[:, feature_id], alpha, iterations)

  0%|          | 0/150 [00:00<?, ?it/s]

(7049, 401) (7049, 1)


  1%|          | 1/150 [00:00<01:41,  1.46it/s]

[0.46535824 1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         

  1%|▏         | 2/150 [00:01<01:40,  1.47it/s]

[0.46535824 0.44898231 1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         

  2%|▏         | 3/150 [00:02<01:42,  1.43it/s]

[0.46535824 0.44898231 0.4336141  1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         

  3%|▎         | 4/150 [00:03<01:54,  1.28it/s]

[0.46535824 0.44898231 0.4336141  0.41918878 1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         

  3%|▎         | 5/150 [00:03<01:51,  1.30it/s]

[0.46535824 0.44898231 0.4336141  0.41918878 0.40564585 1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         

  4%|▍         | 6/150 [00:04<01:53,  1.27it/s]

[0.46535824 0.44898231 0.4336141  0.41918878 0.40564585 0.39292884
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1. 

  5%|▍         | 7/150 [00:05<01:52,  1.28it/s]

[0.46535824 0.44898231 0.4336141  0.41918878 0.40564585 0.39292884
 0.38098495 1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1. 

  5%|▌         | 8/150 [00:06<01:47,  1.32it/s]

[0.46535824 0.44898231 0.4336141  0.41918878 0.40564585 0.39292884
 0.38098495 0.36976484 1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1. 

  6%|▌         | 9/150 [00:06<01:44,  1.35it/s]

[0.46535824 0.44898231 0.4336141  0.41918878 0.40564585 0.39292884
 0.38098495 0.36976484 0.35922235 1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1. 

  7%|▋         | 10/150 [00:07<01:42,  1.37it/s]

[0.46535824 0.44898231 0.4336141  0.41918878 0.40564585 0.39292884
 0.38098495 0.36976484 0.35922235 0.34931434 1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1. 

In [26]:
X_1 = np.concatenate((np.ones((X.shape[0], 1)), X), axis = 1)
y_res = np.dot(theta, X_1.T) * y_train.std() + y_train.mean()

In [28]:
r2_score(y_train[:,feature_id].flatten(), y_res.flatten())

-5.697484715908489

In [29]:
y_train[:, feature_id]

array([66.03356391, 64.33293617, 65.05705263, ..., 66.69073171,
       70.96508235, 66.93831111])

In [30]:
y_res

array([[68.56639058, 51.13163849, 57.65844315, ..., 69.40764548,
        56.87630887, 64.82137744]])

In [31]:
l = y_res - y_train[:,feature_id]
np.dot(l,l.T)

array([[560899.33705586]])