### Run this to resolve import errors

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)

## Extracting data from logfiles

In [4]:
from log_parser import parse_keyboard_log, parse_mouse_log
from keyboard_heatmap import KeyboardHeatmap

# Segment length of each data sample
seg_length = 100

routing_file = open('../.routing', 'r')
Lines = routing_file.readlines()

# List of parsed logfiles
keyboard = []
mouse = []

for line in Lines:
    line = line.strip()
    if 'key.log' in line:
        keyboard.append(parse_keyboard_log('../' + line))
    '''elif 'mouse.log' in line:
        mouse.append(parse_mouse_log('../' + line))'''

# Split into train and test sets
X_train = []
X_test = []
Y_train = []
Y_test = []

for k in range(len(keyboard) - 1):
    for i in range(int(keyboard[k].time.iloc[-1] / seg_length)):
        # For each segment in each logfile
        # Create a heatmap for that segment
        heatmap = KeyboardHeatmap(keyboard[k], i, seg_length)
        bheatmap = heatmap.to_binary_class_label("ZIRSOL")
        # If the heatmap isn't blank
        if bheatmap.class_label() != 'Null':
            X_train.append(bheatmap.heatmap_data().ravel().tolist())
            Y_train.append(bheatmap.class_label())

for k in range(len(keyboard) - 1,len(keyboard)):
    for i in range(int(keyboard[k].time.iloc[-1] / seg_length)):
        # For each segment in each logfile
        # Create a heatmap for that segment
        heatmap = KeyboardHeatmap(keyboard[k], i, seg_length)
        bheatmap = heatmap.to_binary_class_label("ZIRSOL")
        # If the heatmap isn't blank
        if bheatmap.class_label() != 'Null':
            X_test.append(bheatmap.heatmap_data().ravel().tolist())
            Y_test.append(bheatmap.class_label())

print(f"Train samples: {len(Y_train)}")
print(f"Test samples: {len(Y_test)}")

FileNotFoundError: [Errno 2] No such file or directory: '../.././logs/JON/SPY/key.log'

## Training a ANN model on keyboard heatmap data

In [3]:
import sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
import random

# Make random_state random each run, it causes different data shuffel each run.
# Make it a constant if want to test activation funcyion and/or solver
randomState = random.randint(0,10000)

# Split into train and test sets. 70% for traning data, 30% for test data
#X_train, X_test, Y_train, Y_test = train_test_split(X_actual, Y_actual, random_state=randomState, test_size=0.3)

# Normalize the data to make it easier and faster for MLP to converge
scaler = StandardScaler()
# Fit only to the training data
scaler.fit(X_train)
StandardScaler(copy=True, with_mean=True, with_std=True)
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)



# Activation: 'relu' and 'logistic' are considered useful. But their performances are very similar now.
# Solver: 'adam' is better for dataset with thousands of samples, 'lbfgs' is much faster for small dataset
#         But 'adam' performs better in our case.

mlp = MLPClassifier(hidden_layer_sizes=(100,100), activation='relu', solver='adam', max_iter=10000)
mlp.fit(X_train,Y_train)

predict_train = mlp.predict(X_train)
predict_test = mlp.predict(X_test)

#Show model prediction and evaluation
from sklearn.metrics import classification_report,confusion_matrix
print("Result on Training data")
print(confusion_matrix(Y_train,predict_train))
print(classification_report(Y_train,predict_train))
print("Result on Test data")
print(confusion_matrix(Y_test,predict_test))
print(classification_report(Y_test,predict_test))



Result on Training data
[[388   0]
 [  0  36]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       388
           1       1.00      1.00      1.00        36

    accuracy                           1.00       424
   macro avg       1.00      1.00      1.00       424
weighted avg       1.00      1.00      1.00       424

Result on Test data
[[34  1]
 [ 0  0]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        35
           1       0.00      0.00      0.00         0

    accuracy                           0.97        35
   macro avg       0.50      0.49      0.49        35
weighted avg       1.00      0.97      0.99        35



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
