In [14]:
#Load python modules
import utils
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import time
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
from sklearn.tree import export_graphviz
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.externals import joblib
from sklearn.metrics import confusion_matrix
import pickle
import time
import csv
import json

In [15]:
#Saves the start time of the operation
start_time = time.time()

In [16]:
def batch_classify(X_train, Y_train, X_test, Y_test, no_classifiers = 5, verbose = True):
    dict_models = {}
    for classifier_name, classifier in list(dict_classifiers.items())[:no_classifiers]:
        t_start = time.clock()
        classifier.fit(X_train, Y_train)
        t_end = time.clock()
        
        t_diff = t_end - t_start
        train_score = classifier.score(X_train, Y_train)
        test_score = classifier.score(X_test, Y_test)
        
        dict_models[classifier_name] = {'model': classifier, 'train_score': train_score, 'test_score': test_score, 'train_time': t_diff}
        if verbose:
            print("trained {c} in {f:.2f} s".format(c=classifier_name, f=t_diff))
        
        #Exports classifier
        with open('./'+ "teste" + '.pkl', 'wb') as f:
            pickle.dump(classifier, f, pickle.HIGHEST_PROTOCOL)
    return dict_models

In [17]:
def display_dict_models(dict_models, sort_by='test_score'):
    cls = [key for key in dict_models.keys()]
    test_s = [dict_models[key]['test_score'] for key in cls]
    training_s = [dict_models[key]['train_score'] for key in cls]
    training_t = [dict_models[key]['train_time'] for key in cls]
    
    df_ = pd.DataFrame(data=np.zeros(shape=(len(cls),4)), columns = ['classifier', 'train_score', 'test_score', 'train_time'])
    for ii in range(0,len(cls)):
        df_.loc[ii, 'classifier'] = cls[ii]
        df_.loc[ii, 'train_score'] = training_s[ii]
        df_.loc[ii, 'test_score'] = test_s[ii]
        df_.loc[ii, 'train_time'] = training_t[ii]
    
    print(df_.sort_values(by=sort_by, ascending=False))

In [18]:
#Reads dataset CSV file
data = pd.read_csv('/projects/USO-DE-TECNOLOGIA-ASSISTIVA-PARA-GUIAR-ALUNOS-COM-DEFICIENCIA-VISUAL-NO-CAMPUS-SENAC-SANTO-AMARO/interest_points.csv')

In [19]:
#Loads the mac addresses dictionaire
macAddresses = utils.loadObj('macAddresses')

#print mac addresses dictionaire
print(macAddresses)

{u'24:79:2A:BD:16:C8': 22, u'24:79:2A:7D:1B:18': 52, u'B8:09:8A:D9:05:F1': 120, u'24:79:2A:7D:AB:88': 111, u'A8:16:D0:A0:7A:CF': 74, u'24:79:2A:FD:BF:08': 1, u'88:B4:A6:8B:13:E1': 189, u'9C:5C:F9:37:3B:86': 124, u'24:79:2A:7D:30:28': 55, u'24:79:2A:BD:AB:58': 25, u'24:79:2A:FD:C5:98': 11, u'AA:5C:2C:0B:6E:9F': 31, u'24:79:2A:BD:1B:18': 9, u'24:79:2A:FC:D8:28': 145, u'24:79:2A:3D:1E:88': 13, u'24:79:2A:3D:BF:08': 0, u'6E:C9:D3:AF:12:E7': 139, u'62:45:CB:99:00:06': 98, u'06:D6:AA:7C:11:C3': 68, u'80:58:F8:B2:F0:F6': 194, u'24:79:2A:BD:C5:98': 10, u'DC:BF:E9:16:BE:19': 225, u'24:79:2A:7D:2F:D8': 182, u'24:79:2A:BD:3B:08': 93, u'D0:92:9E:B5:91:DE': 197, u'24:79:2A:3D:AB:58': 27, u'24:79:2A:3C:D0:78': 23, u'24:79:2A:7D:2D:F8': 64, u'4C:ED:FB:83:62:70': 107, u'02:3D:E8:B4:C9:47': 166, u'50:92:B9:92:74:02': 224, u'44:D8:84:6E:34:2B': 80, u'24:79:2A:BC:D3:48': 134, u'24:79:2A:FC:94:68': 7, u'BA:10:E8:DD:EE:0B': 103, u'AE:E4:B5:BE:78:1F': 217, u'24:79:2A:FD:3B:08': 95, u'06:D6:AA:32:92:38': 84,

In [20]:
keys = [0] * len(macAddresses)
for key, value in macAddresses.iteritems():
    keys[value] = key

#print mac addresses keys that will be used as columns
print(keys)

[u'24:79:2A:3D:BF:08', u'24:79:2A:FD:BF:08', u'F4:F5:24:1B:8B:AB', u'24:79:2A:BD:BF:08', u'24:79:2A:FD:1E:88', u'24:79:2A:3C:94:68', u'24:79:2A:BC:94:68', u'24:79:2A:FC:94:68', u'24:79:2A:3D:1B:18', u'24:79:2A:BD:1B:18', u'24:79:2A:BD:C5:98', u'24:79:2A:FD:C5:98', u'24:79:2A:3D:C5:98', u'24:79:2A:3D:1E:88', u'24:79:2A:BD:1E:88', u'24:79:2A:FD:1B:18', u'24:79:2A:FD:AB:58', u'24:79:2A:BD:1E:38', u'BE:9F:EF:4C:AF:8F', u'24:79:2A:FD:1E:38', u'24:79:2A:FC:D0:78', u'24:79:2A:FD:16:C8', u'24:79:2A:BD:16:C8', u'24:79:2A:3C:D0:78', u'24:79:2A:BC:D0:78', u'24:79:2A:BD:AB:58', u'24:79:2A:3D:1E:38', u'24:79:2A:3D:AB:58', u'24:79:2A:BD:B0:48', u'F8:E0:79:FE:FA:95', u'24:79:2A:3D:16:C8', u'AA:5C:2C:0B:6E:9F', u'24:79:2A:FD:3D:08', u'24:79:2A:3D:2D:F8', u'24:79:2A:3D:B0:48', u'24:79:2A:BD:2D:F8', u'24:79:2A:FD:30:28', u'24:79:2A:BD:30:28', u'24:79:2A:FD:B0:48', u'24:79:2A:FD:2D:F8', u'B2:60:F1:5E:07:8A', u'62:F1:89:5F:5E:DA', u'24:79:2A:3D:3D:08', u'24:79:2A:BD:3D:08', u'24:79:2A:3D:30:28', u'24:79:2

In [21]:
#Loads data from all mac addresses columns 
X = data[keys]

#Fills the gaps with zeros
X.fillna(0, inplace=True)

#Prints X data
print(X)

       24:79:2A:3D:BF:08  24:79:2A:FD:BF:08  F4:F5:24:1B:8B:AB  \
0                    0.0                0.0                0.0   
1                    0.0                0.0                0.0   
2                    0.0                0.0                0.0   
3                    0.0                0.0                0.0   
4                    0.0                0.0                0.0   
5                    0.0                0.0                0.0   
6                    0.0                0.0                0.0   
7                    0.0                0.0                0.0   
8                    0.0                0.0                0.0   
9                    0.0                0.0                0.0   
10                   0.0                0.0                0.0   
11                   0.0                0.0                0.0   
12                   0.0                0.0                0.0   
13                   0.0                0.0                0.0   
14        

In [22]:
#Loads the room column as our label
Y = ((data[['room']]).values.ravel())

#Prints labels
print(Y)

['E169' 'E169' 'E169' ... 'E166' 'E166' 'E166']


In [23]:
#Splits data - 70% to train the model and the 30% to test the model after training
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

In [30]:
#Defines a dict containing all Machine Learning techniques that will be used
dict_classifiers = {
    #"Logistic Regression": LogisticRegression(),
    "Nearest Neighbors": KNeighborsClassifier(n_neighbors=3),
    "Linear SVC": LinearSVC(penalty='l1', C=100.0, dual=False, max_iter=10000),
    #"Gradient Boosting Classifier": GradientBoostingClassifier(n_estimators=1000),
    "Decision Tree": tree.DecisionTreeClassifier(min_samples_split=50),
    "SGDClassifier": SGDClassifier(n_jobs=-1, penalty='l1', l1_ratio=1,tol=None, max_iter=10000),
    #"Random Forest": RandomForestClassifier(n_estimators=1000),
    #"Neural Net": MLPClassifier(alpha = 1),
    #"Naive Bayes": GaussianNB(),
    #"AdaBoost": AdaBoostClassifier(),
    #"QDA": QuadraticDiscriminantAnalysis(),
    #"Gaussian Process": GaussianProcessClassifier()
}

In [31]:
dict_models = batch_classify(X_train, Y_train, X_test, Y_test, no_classifiers = 8)
display_dict_models(dict_models)
elapsed_time = time.time() - start_time
print 'Time to train models = ' + str(elapsed_time) + ' seconds.'

trained Linear SVC in 37.20 s




trained SGDClassifier in 1299.40 s
trained Nearest Neighbors in 1.49 s
trained Decision Tree in 0.26 s
          classifier  train_score  test_score   train_time
2  Nearest Neighbors     0.999438    0.997728     1.494223
0         Linear SVC     0.995693    0.995893    37.195106
3      Decision Tree     0.997228    0.995194     0.264526
1      SGDClassifier     0.986815    0.987416  1299.397763
Time to train models = 1260.65132093 seconds.
