### Run this to resolve import errors

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from log_parser import parse_keyboard_log
from log_parser import parse_mouse_log
from keyboard_heatmap import KeyboardHeatmap

# Segment length of each data sample
seg_length = 100

routing_file = open('../.routing', 'r')
Lines = routing_file.readlines()

# List of parsed logfiles
keyboard = []
mouse = []

for line in Lines:
    line = line.strip()
    if 'key.log' in line:
        keyboard.append(parse_keyboard_log('../' + line))
    elif 'mouse.log' in line:
        mouse.append(parse_mouse_log('../' + line))

# Split into train and test sets
X_train = []
X_test = []
Y_train = []
Y_test = []

target = "MITSOL"

for k in range(len(keyboard) - 1):
    for i in range(int(keyboard[k].time.iloc[-1] / seg_length)):
        # For each segment in each logfile
        # Create a heatmap for that segment
        heatmap = KeyboardHeatmap(keyboard[k], i, seg_length)
        heatmap = heatmap.to_binary_class_label(target)
        # If the heatmap isn't blank
        if heatmap.class_label() != 'Null':
            X_train.append(heatmap.heatmap_data().ravel().tolist())
            Y_train.append(heatmap.class_label())

for k in range(len(keyboard) - 1,len(keyboard)):
    for i in range(int(keyboard[k].time.iloc[-1] / seg_length)):
        # For each segment in each logfile
        # Create a heatmap for that segment
        heatmap = KeyboardHeatmap(keyboard[k], i, seg_length)
        heatmap = heatmap.to_binary_class_label(target)
        # If the heatmap isn't blank
        if heatmap.class_label() != 'Null':
            X_test.append(heatmap.heatmap_data().ravel().tolist())
            Y_test.append(heatmap.class_label())

print(f"Train samples: {len(Y_train)}")
print(f"Test samples: {len(Y_test)}")

Train samples: 424
Test samples: 35


In [3]:
import sklearn
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

#This model using Gaussian Naive-Bayes

NBG = GaussianNB()
model = NBG.fit(X_train,Y_train)
#model = NBG.partial_fit(X_train,Y_train,np.unique(Y_train))

predict_train = NBG.predict(X_train)
predict_test = NBG.predict(X_test)

print( f"Train score: {NBG.score(X_train, Y_train)} " + f"Test score: {NBG.score(X_test, Y_test)}")

#Show model prediction and evaluation
from sklearn.metrics import classification_report,confusion_matrix
print("Result on Training data")
print(confusion_matrix(Y_train,predict_train))
print(classification_report(Y_train,predict_train))
print("Result on Test data")
print(confusion_matrix(Y_test,predict_test))
print(classification_report(Y_test,predict_test))


Train score: 0.5943396226415094 Test score: 0.08571428571428572
Result on Training data
[[216 172]
 [  0  36]]
              precision    recall  f1-score   support

           0       1.00      0.56      0.72       388
           1       0.17      1.00      0.30        36

    accuracy                           0.59       424
   macro avg       0.59      0.78      0.51       424
weighted avg       0.93      0.59      0.68       424

Result on Test data
[[ 3 32]
 [ 0  0]]
              precision    recall  f1-score   support

           0       1.00      0.09      0.16        35
           1       0.00      0.00      0.00         0

    accuracy                           0.09        35
   macro avg       0.50      0.04      0.08        35
weighted avg       1.00      0.09      0.16        35



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [4]:
#Invalid model - data isn't discrete

import sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

#This model using Multinomial Naive-Bayes

NBM = MultinomialNB()
model = NBM.fit(X_train,Y_train)

predict_train = NBM.predict(X_train)
predict_test = NBM.predict(X_test)

print( f"Train score: {NBM.score(X_train, Y_train)} " + f"Test score: {NBM.score(X_test, Y_test)}")

#Show model prediction and evaluation
from sklearn.metrics import classification_report,confusion_matrix
print("Result on Training data")
print(confusion_matrix(Y_train,predict_train))
print(classification_report(Y_train,predict_train))
print("Result on Test data")
print(confusion_matrix(Y_test,predict_test))
print(classification_report(Y_test,predict_test))



Train score: 0.9150943396226415 Test score: 1.0
Result on Training data
[[388   0]
 [ 36   0]]
              precision    recall  f1-score   support

           0       0.92      1.00      0.96       388
           1       0.00      0.00      0.00        36

    accuracy                           0.92       424
   macro avg       0.46      0.50      0.48       424
weighted avg       0.84      0.92      0.87       424

Result on Test data
[[35]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        35

    accuracy                           1.00        35
   macro avg       1.00      1.00      1.00        35
weighted avg       1.00      1.00      1.00        35



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [5]:
import sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import ComplementNB

#This model using Complement Naive-Bayes

NBC = ComplementNB()
model = NBC.fit(X_train,Y_train)

predict_train = NBC.predict(X_train)
predict_test = NBC.predict(X_test)

print( f"Train score: {NBC.score(X_train, Y_train)} " + f"Test score: {NBC.score(X_test, Y_test)}")

#Show model prediction and evaluation
from sklearn.metrics import classification_report,confusion_matrix
print("Result on Training data")
print(confusion_matrix(Y_train,predict_train))
print(classification_report(Y_train,predict_train))
print("Result on Test data")
print(confusion_matrix(Y_test,predict_test))
print(classification_report(Y_test,predict_test))


Train score: 0.8183962264150944 Test score: 0.3142857142857143
Result on Training data
[[319  69]
 [  8  28]]
              precision    recall  f1-score   support

           0       0.98      0.82      0.89       388
           1       0.29      0.78      0.42        36

    accuracy                           0.82       424
   macro avg       0.63      0.80      0.66       424
weighted avg       0.92      0.82      0.85       424

Result on Test data
[[11 24]
 [ 0  0]]
              precision    recall  f1-score   support

           0       1.00      0.31      0.48        35
           1       0.00      0.00      0.00         0

    accuracy                           0.31        35
   macro avg       0.50      0.16      0.24        35
weighted avg       1.00      0.31      0.48        35



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [6]:
#Invalid model - data isn't discrete

import sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import CategoricalNB

#This model using Categorical Naive-Bayes

NBCat = CategoricalNB()
model = NBCat.fit(X_train,Y_train)

predict_train = NBCat.predict(X_train)
predict_test = NBCat.predict(X_test)

print( f"Train score: {NBCat.score(X_train, Y_train)} " + f"Test score: {NBCat.score(X_test, Y_test)}")

#Show model prediction and evaluation
from sklearn.metrics import classification_report,confusion_matrix
print("Result on Training data")
print(confusion_matrix(Y_train,predict_train))
print(classification_report(Y_train,predict_train))
print("Result on Test data")
print(confusion_matrix(Y_test,predict_test))
print(classification_report(Y_test,predict_test))


Train score: 0.9150943396226415 Test score: 1.0
Result on Training data
[[388   0]
 [ 36   0]]
              precision    recall  f1-score   support

           0       0.92      1.00      0.96       388
           1       0.00      0.00      0.00        36

    accuracy                           0.92       424
   macro avg       0.46      0.50      0.48       424
weighted avg       0.84      0.92      0.87       424

Result on Test data
[[35]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        35

    accuracy                           1.00        35
   macro avg       1.00      1.00      1.00        35
weighted avg       1.00      1.00      1.00        35



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
