Currently this notebook can be used to record one person's keystroke pattern once. In the same format as is needed by the model that was trained at CMU.

Next Steps by Susmeet:
 - Remove the hardcodings
 - Improve the code to record multiple entries from multiple people
 - Add a simple GUI
 
Susmeet would need help for the following:
 - Making the code object oriented

In [1]:
from pynput import keyboard
import time

import pandas as pd

#### defining our fixed variables for this project

In [2]:
cols = ['key', 'action', 'time']
PASSWORD = '.tie5Roanl'

#### Supporting Functions

In [3]:
def get_key_name(key):
    """
    returns the key that the usr has pressed
    """
    if isinstance(key, keyboard.KeyCode):
        return key.char
    else:
        return str(key)

def record_keystroke(keystroke_info):
    """
    stores the information on pressed keys along with timestamps for press and release
    """
    global data
    data = data.append(keystroke_info, ignore_index=True)

def on_press(key):
    """
    gets called when a key is pressed
    aborts the process when the esc key is pressed
    """
    t = time.time()
    key_name = get_key_name(key)
    if key_name == 'Key.esc':
        print('Exiting...')
        return False
    record_keystroke({'key' : key_name, 'action' : 'press', 'time' : t})

def on_release(key):
    """
    gets called when key is released
    """
    t = time.time()
    key_name = get_key_name(key)
    record_keystroke({'key' : key_name, 'action' : 'release', 'time' : t})

#### The listener function
Run this function and strictly follow these steps:
 - Type the following password
` .tie5Roanl `
followed by the Enter key.

 - Make sure you use the shift key for typing a capital 'R' and not capslock.

 - Once you are done, press the escape key

In [4]:
data = pd.DataFrame(columns = cols)

with keyboard.Listener(
    on_press = on_press,
    on_release = on_release) as listener:
    listener.join()

Exiting...


##### 

##### 

##### 

#### Data Preparation

In [5]:
# Seperating the release and press data into different dataframes to process seperately
release_times = data.loc[data['action'] == 'release'].copy()
release_times.drop('action', inplace=True, axis=1)
release_times.rename({'time' : 'release'}, inplace=True, axis=1)

press_times = data.loc[data['action'] == 'press'].copy()
press_times.drop('action', inplace=True, axis=1)
press_times.rename({'time' : 'press'}, inplace=True, axis=1)

In [6]:
# Merge the shift and R keys in case the listener records them differently, this ends up happening sometimes
try:
    release_merge_indices = release_times[release_times['key'].isin(['R', 'Key.shift'])].index
    release_times.drop(release_merge_indices[0], inplace=True)
    release_times.loc[release_merge_indices[-1], 'key'] = 'Shift.r'

    press_del_index = press_times.loc[press_times['key'] == 'R'].index
    press_times.drop(press_del_index, inplace=True)
    press_times.loc[press_times['key'] == 'Key.shift', 'key'] = 'Shift.r'
except:
    pass

In [7]:
# merge the press and release data again
combined_data = pd.merge(press_times, release_times, on = 'key')

# another hardcoding, change the '.' to 'period', that's how it is recorded at CMU
combined_data.loc[0,'key'] = 'period'

combined_data.set_index('key', inplace=True)

#### Feature Engineering

In [None]:
engineered_features = pd.Series()

for key, time in combined_data.iterrows():
    
    H_feature = 'H.' + str(key)
    engineered_features[H_feature] = time['release'] - time['press']
    
    try:
        UD_feature = 'UD.' + str(prev_key) + '.' + str(key)
        engineered_features[UD_feature] = time['press'] - prev_time['release']
        DD_feature = 'DD.' + str(prev_key) + '.' + str(key)
        engineered_features[DD_feature] = time['press'] - prev_time['press']
    except:
        pass
        
    prev_key = key
    prev_time = time

In [12]:
engineered_features

H.period          0.137387
H.t               0.127401
UD.period.t       0.118614
DD.period.t       0.256001
H.i               0.122062
UD.t.i            0.271895
DD.t.i            0.399296
H.e               0.132734
UD.i.e            0.125989
DD.i.e            0.248051
H.5               0.094826
UD.e.5            0.819843
DD.e.5            0.952577
H.Shift.r         0.783378
UD.5.Shift.r      3.149976
DD.5.Shift.r      3.244802
H.o               0.116522
UD.Shift.r.o      4.785663
DD.Shift.r.o      5.569041
H.a               0.175586
UD.o.a            0.319842
DD.o.a            0.436365
H.n               0.121719
UD.a.n            0.777663
DD.a.n            0.953249
H.l               0.138052
UD.n.l            0.517390
DD.n.l            0.639109
H.Key.enter       0.100811
UD.l.Key.enter    1.464071
DD.l.Key.enter    1.602123
dtype: float64