In [12]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import trange
from scipy.stats.stats import pearsonr
from sklearn.metrics import r2_score
from skimage.feature import hog

In [13]:
train_data = pd.read_csv("./data/training.csv")
test_data = pd.read_csv("./data/test.csv")
train_data.fillna(method='ffill', inplace=True)

In [14]:
def append_X(X):
    return [np.asarray(image.split(' '), dtype= 'float').reshape(96, 96) for image in X.Image]

In [15]:
X_train = append_X(train_data) 

In [16]:
X_test = append_X(test_data)

In [17]:
y_train = train_data.iloc[:, :-1].values
train_data.head()

Unnamed: 0,left_eye_center_x,left_eye_center_y,right_eye_center_x,right_eye_center_y,left_eye_inner_corner_x,left_eye_inner_corner_y,left_eye_outer_corner_x,left_eye_outer_corner_y,right_eye_inner_corner_x,right_eye_inner_corner_y,...,nose_tip_y,mouth_left_corner_x,mouth_left_corner_y,mouth_right_corner_x,mouth_right_corner_y,mouth_center_top_lip_x,mouth_center_top_lip_y,mouth_center_bottom_lip_x,mouth_center_bottom_lip_y,Image
0,66.033564,39.002274,30.227008,36.421678,59.582075,39.647423,73.130346,39.969997,36.356571,37.389402,...,57.066803,61.195308,79.970165,28.614496,77.388992,43.312602,72.935459,43.130707,84.485774,238 236 237 238 240 240 239 241 241 243 240 23...
1,64.332936,34.970077,29.949277,33.448715,58.85617,35.274349,70.722723,36.187166,36.034723,34.361532,...,55.660936,56.421447,76.352,35.122383,76.04766,46.684596,70.266553,45.467915,85.48017,219 215 204 196 204 211 212 200 180 168 178 19...
2,65.057053,34.909642,30.903789,34.909642,59.412,36.320968,70.984421,36.320968,37.678105,36.320968,...,53.538947,60.822947,73.014316,33.726316,72.732,47.274947,70.191789,47.274947,78.659368,144 142 159 180 188 188 184 180 167 132 84 59 ...
3,65.225739,37.261774,32.023096,37.261774,60.003339,39.127179,72.314713,38.380967,37.618643,38.754115,...,54.166539,65.598887,72.703722,37.245496,74.195478,50.303165,70.091687,51.561183,78.268383,193 192 193 194 194 194 193 192 168 111 50 12 ...
4,66.725301,39.621261,32.24481,38.042032,58.56589,39.621261,72.515926,39.884466,36.98238,39.094852,...,64.889521,60.671411,77.523239,31.191755,76.997301,44.962748,73.707387,44.227141,86.871166,147 148 160 196 215 214 216 217 219 220 206 18...


In [18]:
X_train = np.array(X_train).reshape(len(X_train), -1)

In [19]:
y_train

array([[66.03356391, 39.00227368, 30.22700752, ..., 72.93545865,
        43.13070677, 84.48577444],
       [64.33293617, 34.9700766 , 29.9492766 , ..., 70.26655319,
        45.46791489, 85.48017021],
       [65.05705263, 34.90964211, 30.90378947, ..., 70.19178947,
        47.27494737, 78.65936842],
       ...,
       [66.69073171, 36.84522146, 31.66641951, ..., 75.96359236,
        49.46257171, 78.11712   ],
       [70.96508235, 39.85366588, 30.54328471, ..., 75.96359236,
        50.06518588, 79.58644706],
       [66.93831111, 43.42450963, 31.09605926, ..., 75.96359236,
        45.90048   , 82.7730963 ]])

In [20]:
def hypothesis(theta, X, n):
    h = np.ones((X.shape[0],1))
    theta = theta.reshape(1,n+1)
    for i in range(0,X.shape[0]):
        h[i] = float(np.matmul(theta, X[i]))
    h = h.reshape(X.shape[0])
    return h

In [21]:
def BGD(theta, alpha, num_iters, h, X, y, n):
    cost = np.ones(num_iters)
    for i in trange(0,num_iters):
        theta[0] = theta[0] - (alpha/X.shape[0]) * sum(h - y)
        for j in range(1,n+1):
            theta[j] = theta[j] - (alpha/X.shape[0]) * sum((h-y) * X.transpose()[j])
        h = hypothesis(theta, X, n)
        cost[i] = (1/X.shape[0]) * 0.5 * sum(np.square(h - y))
        print(cost)
    theta = theta.reshape(1,n+1)
    return theta, cost

In [22]:
def linear_regression(X, y, alpha, num_iters):
    n = X.shape[1]
    one_column = np.ones((X.shape[0],1))
    X = np.concatenate((one_column, X), axis = 1)
    print(X.shape, one_column.shape)
    # initializing the parameter vector...
    theta = np.zeros(n+1)
    # hypothesis calculation....
    h = hypothesis(theta, X, n)
    # returning the optimized parameters by Gradient Descent...
    theta, cost = BGD(theta,alpha,num_iters,h,X,y,n)
    return theta, cost

In [23]:
def get_predictor(X, y, iterations, feature_id):
    alpha = 0.001
    box_range = 10
    theta, cost = linear_regression(X, y[:, 0], alpha, iterations)
    return theta

In [27]:
def cut_data(X_train, y_train, feature_id):
    def group_in_pairs(ung_features):
        return map(lambda item: list(zip(*[item[i::2] for i in range(2)])), ung_features)
    train_landmarks_coordinates = np.array(list(group_in_pairs(y_train)))
    average_coordinates = train_landmarks_coordinates.mean(axis=0)
    box_x = int(average_coordinates[feature_id][0])
    box_y = int(average_coordinates[feature_id][1])
    # Default Bounding box
    X_new = X_train.reshape(X_train.shape[0], 96, 96)
    X_new = np.array(list(map(lambda image: hog(image, orientations=9, pixels_per_cell=(8, 8),
                    cells_per_block=(1, 1), visualize=False, multichannel=False), X_new)))
    #     X_new = np.array(X_new).reshape(len(X_new), -1)
    # Sum by columns
#     X_new = X_train.reshape(X_train.shape[0], 96, 96)
    # X_new = np.array(list(map(lambda image: image[box_x - box_range: box_x + box_range, box_y - box_range: box_y + box_range], X_new)))
#     X_new = np.sum(X_new, 2)
    X = (X_new - X_new.mean()) / X_new.std()
    y = (y_train - y_train.mean()) / y_train.std()
    return X, y

In [32]:
class Predictor:
    iterations = 10
    
    def __init__(self, X, y):
        self.y = y
        self.thetas = []
#         self.features = [0, 1, 2, 3, 20, 21, 26, 27]
        self.features = [0]
        for i in self.features:
            X_new, y_new = cut_data(X, y, i)
            self.thetas.append(get_predictor(X_new, y_new, Predictor.iterations, i))
            
    def predict(self, X, feature_id):
        X, y = cut_data(X, self.y, feature_id)
        X_1 = np.concatenate((np.ones((X.shape[0], 1)), X), axis = 1)
        return np.dot(self.thetas[self.features.index(feature_id)], X_1.T) * self.y.std() + self.y.mean()
        

In [33]:
p = Predictor(X_train, y_train)

  0%|          | 0/10 [00:00<?, ?it/s]

(7049, 1297) (7049, 1)


 10%|█         | 1/10 [00:02<00:19,  2.21s/it]

[0.23345051 1.         1.         1.         1.         1.
 1.         1.         1.         1.        ]


 20%|██        | 2/10 [00:04<00:18,  2.28s/it]

[0.23345051 0.12476314 1.         1.         1.         1.
 1.         1.         1.         1.        ]


 30%|███       | 3/10 [00:06<00:15,  2.25s/it]

[0.23345051 0.12476314 0.07721619 1.         1.         1.
 1.         1.         1.         1.        ]


 40%|████      | 4/10 [00:08<00:13,  2.20s/it]

[0.23345051 0.12476314 0.07721619 0.05624686 1.         1.
 1.         1.         1.         1.        ]


 50%|█████     | 5/10 [00:10<00:10,  2.14s/it]

[0.23345051 0.12476314 0.07721619 0.05624686 0.04683505 1.
 1.         1.         1.         1.        ]


 60%|██████    | 6/10 [00:12<00:08,  2.11s/it]

[0.23345051 0.12476314 0.07721619 0.05624686 0.04683505 0.04245328
 1.         1.         1.         1.        ]


 70%|███████   | 7/10 [00:15<00:06,  2.11s/it]

[0.23345051 0.12476314 0.07721619 0.05624686 0.04683505 0.04245328
 0.04026472 1.         1.         1.        ]


 80%|████████  | 8/10 [00:17<00:04,  2.13s/it]

[0.23345051 0.12476314 0.07721619 0.05624686 0.04683505 0.04245328
 0.04026472 0.03903633 1.         1.        ]


 90%|█████████ | 9/10 [00:19<00:02,  2.23s/it]

[0.23345051 0.12476314 0.07721619 0.05624686 0.04683505 0.04245328
 0.04026472 0.03903633 0.03823207 1.        ]


100%|██████████| 10/10 [00:21<00:00,  2.25s/it]

[0.23345051 0.12476314 0.07721619 0.05624686 0.04683505 0.04245328
 0.04026472 0.03903633 0.03823207 0.0376188 ]





In [None]:
y_res = p.predict(X_train, 0)
print(y_res)

In [180]:
r2_score(y_train[:,0].flatten(), y_res.flatten())

-5.275072903326386

In [181]:
l = y_res - y_train[:,0]
np.dot(l,l.T)

array([[525523.29430371]])