## Extracting data from logfiles

In [15]:
from log_parser import parse_keyboard_log
from keyboard_heatmap import KeyboardHeatmap

# Segment length of each data sample
seg_length = 100

# List of parsed logfiles
keyboard = []
keyboard.append(parse_keyboard_log("keylogs/joseph/keyboard.log"))
keyboard.append(parse_keyboard_log("keylogs/jonathan/keyboard.log"))
#keyboard.append(parse_keyboard_log("keylogs/zirui/keyboard.log"))
keyboard.append(parse_keyboard_log("keylogs/Hengjun/keyboard.log"))

# Hack to get the number of segments contained in each file
lengths = [
    keyboard[0].time.iloc[-1] / seg_length,
    keyboard[1].time.iloc[-1] / seg_length,
    keyboard[2].time.iloc[-1] / seg_length,
    #keyboard[3].time.iloc[-1] / seg_length
]

# Empty lists for inserting data
X_actual = []
y_actual = []


for k in range(len(keyboard)):
    for i in range(int(lengths[k])):
        # For each segment in each logfile
        # Create a heatmap for that segment
        heatmap = KeyboardHeatmap(keyboard[k], i, seg_length)
        # If the heatmap isn't blank
        if heatmap.class_label() != 'Null':
            X_actual.append(heatmap.heatmap_data().ravel().tolist())
            y_actual.append(heatmap.class_label())

# Display features
"""
for i in range(len(X_actual)):
    print(X_actual[i][:10], y_actual[i])
"""
print(f"Data samples: {len(y_actual)}")

Data samples: 51


## Training a ANN model on keyboard heatmap data

In [25]:
import sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics import r2_score

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_actual, y_actual, random_state=0)

mlp = MLPClassifier(hidden_layer_sizes=(65,65), activation='relu', solver='adam', max_iter=1000)
mlp.fit(X_train,y_train)

predict_train = mlp.predict(X_train)
predict_test = mlp.predict(X_test)


from sklearn.metrics import classification_report,confusion_matrix
print("Result on Training data")
print(confusion_matrix(y_train,predict_train))
print(classification_report(y_train,predict_train))
print("Result on Test data")
print(confusion_matrix(y_test,predict_test))
print(classification_report(y_test,predict_test))



Result on Training data
[[27  0  0]
 [ 0  3  0]
 [ 0  0  8]]
              precision    recall  f1-score   support

      HENSOL       1.00      1.00      1.00        27
      JONSOL       1.00      1.00      1.00         3
      Joseph       1.00      1.00      1.00         8

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38

Result on Test data
[[8 0 1]
 [1 0 1]
 [0 0 2]]
              precision    recall  f1-score   support

      HENSOL       0.89      0.89      0.89         9
      JONSOL       0.00      0.00      0.00         2
      Joseph       0.50      1.00      0.67         2

    accuracy                           0.77        13
   macro avg       0.46      0.63      0.52        13
weighted avg       0.69      0.77      0.72        13



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
