## Extracting data from logfiles

In [10]:
from log_parser import parse_keyboard_log
from log_parser import parse_mouse_log
from keyboard_heatmap import KeyboardHeatmap

# Segment length of each data sample
seg_length = 120

routing_file = open('.routing', 'r')
Lines = routing_file.readlines()

# List of parsed logfiles
keyboard = []
mouse = []

for line in Lines:
    line = line.strip()
    if 'keyboard_actions.log' in line:
        keyboard.append(parse_keyboard_log(line))
    '''elif 'mouse_actions.log' in line: # Don't use mouse log for now, we need a better design
        mouse.append(parse_mouse_log(line))'''

# Empty lists for inserting data
X_actual = []
Y_actual = []

for k in range(len(keyboard)):
    for i in range(int(keyboard[k].time.iloc[-1] / seg_length)):
        # For each segment in each logfile
        # Create a heatmap for that segment
        heatmap = KeyboardHeatmap(keyboard[k], i, seg_length)
        # If the heatmap isn't blank
        if heatmap.class_label() != 'Null':
            X_actual.append(heatmap.heatmap_data().ravel().tolist())
            Y_actual.append(heatmap.class_label())

print(f"Data samples: {len(Y_actual)}")

Data samples: 151


## Training a ANN model on keyboard heatmap data

In [11]:
import sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
import random

# Make random_state random each run, it causes different data shuffel each run.
# Make it a constant if want to test activation funcyion and/or solver
randomState = random.randint(0,10000)

# Split into train and test sets. 70% for traning data, 30% for test data
X_train, X_test, Y_train, Y_test = train_test_split(X_actual, Y_actual, random_state=randomState, test_size=0.3)

# Normalize the data to make it easier and faster for MLP to converge
scaler = StandardScaler()
# Fit only to the training data
scaler.fit(X_train)
StandardScaler(copy=True, with_mean=True, with_std=True)
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)



# Activation: 'relu' and 'logistic' are considered useful. But their performances are very similar now.
# Solver: 'adam' is better for dataset with thousands of samples, 'lbfgs' is much faster for small dataset
#         But 'adam' performs better in our case.

mlp = MLPClassifier(hidden_layer_sizes=(100,100), activation='relu', solver='lbfgs', max_iter=10000)
mlp.fit(X_train,Y_train)

predict_train = mlp.predict(X_train)
predict_test = mlp.predict(X_test)

#Show model prediction and evaluation
from sklearn.metrics import classification_report,confusion_matrix
print("Result on Training data")
print(confusion_matrix(Y_train,predict_train))
print(classification_report(Y_train,predict_train))
print("Result on Test data")
print(confusion_matrix(Y_test,predict_test))
print(classification_report(Y_test,predict_test))



Result on Training data
[[18  0  0  0  0]
 [ 0 20  0  0  0]
 [ 0  0 24  0  0]
 [ 0  0  0 25  0]
 [ 0  0  0  0 18]]
              precision    recall  f1-score   support

      HENSOL       1.00      1.00      1.00        18
      JONSPY       1.00      1.00      1.00        20
      MARSOL       1.00      1.00      1.00        24
      MITSOL       1.00      1.00      1.00        25
      ZIRSOL       1.00      1.00      1.00        18

    accuracy                           1.00       105
   macro avg       1.00      1.00      1.00       105
weighted avg       1.00      1.00      1.00       105

Result on Test data
[[10  0  0  2  0]
 [ 0 11  0  0  0]
 [ 0  0  5  1  0]
 [ 0  0  0  5  0]
 [ 0  0  0  1 11]]
              precision    recall  f1-score   support

      HENSOL       1.00      0.83      0.91        12
      JONSPY       1.00      1.00      1.00        11
      MARSOL       1.00      0.83      0.91         6
      MITSOL       0.56      1.00      0.71         5
      ZIRSOL  