In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math

# Importing matplotlib to plot images.
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

# Importing SK-learn to calculate precision and recall
import sklearn
from sklearn import metrics
from sklearn.model_selection import train_test_split, cross_val_score, LeaveOneGroupOut
from sklearn.utils import shuffle
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics.pairwise import euclidean_distances

import pickle as pkl
import h5py

from pathlib import Path
import os.path
import sys
import datetime
import time

  from numpy.core.umath_tests import inner1d


In [2]:
pixel_width = {"N5X":1080,"S3Mini":480,"S4":1080,"N6":1440}
pixel_height = {"N5X":1920,"S3Mini":800,"S4":1920,"N6":2560}

def normalizeWidth(row):
    smartphone = row.Phone
    return row.XPress / pixel_width[smartphone]

def normalizeHeight(row):
    smartphone = row.Phone    
    return row.YPress / pixel_height[smartphone]

In [3]:
df = pd.read_pickle("../../data/step03.pkl")

In [4]:
df.XPress = df.apply(lambda x: normalizeWidth(x), axis=1)
df.YPress = df.apply(lambda x: normalizeHeight(x), axis=1)

df.Sensor = df.Sensor.apply(lambda x: x.reshape(-1, 6, 1))

np.random.seed(42)
lst = df.PId.unique()
np.random.shuffle(lst)

shape = df.Sensor.iloc[0].shape

dfTrain = df[df.PId.isin(lst[:14])]
dfTest = df[df.PId.isin(lst[14:])]

train_x = np.concatenate(dfTrain.Sensor.values).reshape(-1, shape[0], shape[1], shape[2])
train_x = train_x.reshape(-1, train_x.shape[1] * train_x.shape[2])
train_y = dfTrain[['XPress', 'YPress']].values

test_x = np.concatenate(dfTest.Sensor.values).reshape(-1, shape[0], shape[1], shape[2])
test_x = test_x.reshape(-1, test_x.shape[1] * test_x.shape[2])
test_y = dfTest[['XPress', 'YPress']].values

# RandomForestRegressor

In [5]:
regressor = RandomForestRegressor()
params = {'max_depth':range(3,12,2)}
clf = GridSearchCV(regressor, params, cv=5, n_jobs = 7, verbose=1)
clf.fit(train_x, train_y)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=7)]: Done  25 out of  25 | elapsed:  7.0min finished


GridSearchCV(cv=5, error_score='raise',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=None, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=7,
       param_grid={'max_depth': range(3, 12, 2)}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring=None, verbose=1)

In [7]:
results = clf.best_estimator_.predict(test_x)
print("RandomForestRegressor", np.mean(euclidean_distances(test_y, results)))

RandomForestRegressor 0.3828895036868737


# DecisionTreeRegressor

In [8]:
regressor = DecisionTreeRegressor()
params = {'max_depth':range(3,12,3)}
clf = GridSearchCV(regressor, params, cv=5, n_jobs = 7, verbose=1)
clf.fit(train_x, train_y)

Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=7)]: Done  15 out of  15 | elapsed:   42.9s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best'),
       fit_params=None, iid=True, n_jobs=7,
       param_grid={'max_depth': range(3, 12, 3)}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring=None, verbose=1)

In [9]:
results = clf.best_estimator_.predict(test_x)
print("DecisionTreeRegressor", np.mean(euclidean_distances(test_y, results)))

DecisionTreeRegressor 0.38124018979935587


# KNeighborsRegressor

In [19]:
regressor = KNeighborsRegressor()
params = {'n_neighbors':range(3,6,2)}
clf = GridSearchCV(regressor, params, cv=5, n_jobs = 7, verbose=1)
clf.fit(train_x, train_y)

Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=7)]: Done   8 out of  10 | elapsed: 54.2min remaining: 13.5min
[Parallel(n_jobs=7)]: Done  10 out of  10 | elapsed: 54.5min finished


GridSearchCV(cv=5, error_score='raise',
       estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=5, p=2,
          weights='uniform'),
       fit_params=None, iid=True, n_jobs=7,
       param_grid={'n_neighbors': range(3, 6, 2)}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring=None, verbose=1)

In [20]:
results = clf.best_estimator_.predict(test_x)
print("KNeighborsRegressor", np.mean(euclidean_distances(test_y, results)))

KNeighborsRegressor 0.3939812550318272


# MLPRegressor

In [17]:
regressor = MLPRegressor(hidden_layer_sizes = (40,20))
params = {'hidden_layer_sizes':range(10,150,20)}
clf = GridSearchCV(regressor, params, cv=5, n_jobs = 7, verbose=1)
clf.fit(train_x, train_y)

Fitting 5 folds for each of 7 candidates, totalling 35 fits


[Parallel(n_jobs=7)]: Done  35 out of  35 | elapsed: 13.0min finished


GridSearchCV(cv=5, error_score='raise',
       estimator=MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(40, 20), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
       fit_params=None, iid=True, n_jobs=7,
       param_grid={'hidden_layer_sizes': range(10, 150, 20)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=1)

In [18]:
results = clf.best_estimator_.predict(test_x)
print("MLPRegressor", np.mean(euclidean_distances(test_y, results)))

MLPRegressor 0.4103825403006661


# single phones

In [6]:
for smartphone, constant_pixels in zip(["S3Mini", "S4", "N5X", "N6"], [0.1089, 0.0577, 0.06, 0.05109]):
    df = pd.read_pickle("../../data/step03.pkl")
    
    df = df[(df.Phone == smartphone) ]
    df.Sensor = df.Sensor.apply(lambda x: x.reshape(-1, 6, 1))

    np.random.seed(42)
    lst = df.PId.unique()
    np.random.shuffle(lst)

    shape = df.Sensor.iloc[0].shape

    dfTrain = df[df.PId.isin(lst[:14])]
    dfTest = df[df.PId.isin(lst[14:])]

    train_x = np.concatenate(dfTrain.Sensor.values).reshape(-1, shape[0], shape[1], shape[2])
    train_x = train_x.reshape(-1, train_x.shape[1] * train_x.shape[2])
    train_y = dfTrain[['XPress', 'YPress']].values

    test_x = np.concatenate(dfTest.Sensor.values).reshape(-1, shape[0], shape[1], shape[2])
    test_x = test_x.reshape(-1, test_x.shape[1] * test_x.shape[2])
    test_y = dfTest[['XPress', 'YPress']].values
    
    regressor = RandomForestRegressor()
    params = {'max_depth':range(3,12,2)}
    clf = GridSearchCV(regressor, params, cv=5, n_jobs = 7, verbose=1)
    clf.fit(train_x, train_y)
    results = clf.best_estimator_.predict(test_x)
    print(smartphone, "RandomForestRegressor", np.mean(euclidean_distances(test_y, results)) * constant_pixels, "mm")
    
    regressor = DecisionTreeRegressor()
    params = {'max_depth':range(3,12,3)}
    clf = GridSearchCV(regressor, params, cv=5, n_jobs = 7, verbose=1)
    clf.fit(train_x, train_y)
    results = clf.best_estimator_.predict(test_x)
    print(smartphone, "DecisionTreeRegressor", np.mean(euclidean_distances(test_y, results)) * constant_pixels, "mm")
    
    regressor = KNeighborsRegressor()
    params = {'n_neighbors':range(3,6,2)}
    clf = GridSearchCV(regressor, params, cv=5, n_jobs = 7, verbose=1)
    clf.fit(train_x, train_y)
    results = clf.best_estimator_.predict(test_x)
    print(smartphone, "KNeighborsRegressor", np.mean(euclidean_distances(test_y, results)) * constant_pixels, "mm")
    
    regressor = MLPRegressor(hidden_layer_sizes = (40,20))
    params = {'hidden_layer_sizes':range(10,150,20)}
    clf = GridSearchCV(regressor, params, cv=5, n_jobs = 7, verbose=1)
    clf.fit(train_x, train_y)
    results = clf.best_estimator_.predict(test_x)
    print(smartphone, "MLPRegressor", np.mean(euclidean_distances(test_y, results)) * constant_pixels, "mm")

Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=7)]: Done  25 out of  25 | elapsed:  1.3min finished


S3Mini RandomForestRegressor 28.10624620828327 mm
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=7)]: Done  15 out of  15 | elapsed:    8.4s finished


S3Mini DecisionTreeRegressor 28.811604795368435 mm
Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=7)]: Done   8 out of  10 | elapsed:  3.2min remaining:   48.1s
[Parallel(n_jobs=7)]: Done  10 out of  10 | elapsed:  3.4min finished


S3Mini KNeighborsRegressor 28.67146943220779 mm
Fitting 5 folds for each of 7 candidates, totalling 35 fits




[Parallel(n_jobs=7)]: Done  35 out of  35 | elapsed: 16.1min finished


S3Mini MLPRegressor 30.12933285859023 mm
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=7)]: Done  25 out of  25 | elapsed:  1.3min finished


S4 RandomForestRegressor 34.64660209556285 mm
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=7)]: Done  15 out of  15 | elapsed:    8.8s finished


S4 DecisionTreeRegressor 35.58431366347601 mm
Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=7)]: Done   8 out of  10 | elapsed:  3.1min remaining:   46.3s
[Parallel(n_jobs=7)]: Done  10 out of  10 | elapsed:  3.1min finished


S4 KNeighborsRegressor 34.61020876512824 mm
Fitting 5 folds for each of 7 candidates, totalling 35 fits




[Parallel(n_jobs=7)]: Done  35 out of  35 | elapsed: 16.3min finished


S4 MLPRegressor 36.55290903551478 mm
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=7)]: Done  25 out of  25 | elapsed:  1.3min finished


N5X RandomForestRegressor 36.2186217393872 mm
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=7)]: Done  15 out of  15 | elapsed:    8.8s finished


N5X DecisionTreeRegressor 37.02998981645937 mm
Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=7)]: Done   8 out of  10 | elapsed:  3.1min remaining:   47.2s
[Parallel(n_jobs=7)]: Done  10 out of  10 | elapsed:  3.2min finished


N5X KNeighborsRegressor 36.3573257607026 mm
Fitting 5 folds for each of 7 candidates, totalling 35 fits




[Parallel(n_jobs=7)]: Done  35 out of  35 | elapsed: 16.9min finished


N5X MLPRegressor 38.71849415777135 mm
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=7)]: Done  25 out of  25 | elapsed:  1.3min finished


N6 RandomForestRegressor 41.270739118303894 mm
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=7)]: Done  15 out of  15 | elapsed:    8.7s finished


N6 DecisionTreeRegressor 42.54711685625111 mm
Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=7)]: Done   8 out of  10 | elapsed:  3.2min remaining:   48.4s
[Parallel(n_jobs=7)]: Done  10 out of  10 | elapsed:  3.4min finished


N6 KNeighborsRegressor 42.911477872117196 mm
Fitting 5 folds for each of 7 candidates, totalling 35 fits




[Parallel(n_jobs=7)]: Done  35 out of  35 | elapsed: 16.4min finished


N6 MLPRegressor 45.29022206372843 mm


# Plotting

In [None]:
fig,ax = plt.subplots(figsize = (3,6))
ax.set_xlim(0,1080)
ax.set_ylim(1920,0)
ax.scatter(test_y[:,0],test_y[:,1],alpha = 0.3)
ax.scatter(results[:,0],results[:,1],alpha = 0.3)