In [2]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time

from sklearn import svm

In [3]:
keyCodes = {}

file1 = open('Keyboard_key_codes.txt', 'r')

for line in file1:
    keyPair = line.strip()
    keyPair = keyPair.split(" - ")
    keyCodes[keyPair[0]] = keyPair[1]
file1.close()
keyCodes["192"] = "Backquote"

In [4]:

leftSideKeyList = [
    "Q", "W", "E",  "R", "T", "A", "S", "D",
    "F", "G", "Z", "X", "C", "V", "B", "1", "2",
    "3", "4", "5", "6", "Caps Lock", "Esc", "Tab", "Space",
    "left Win", "left Shift", "left Ctrl", "F1", "F2", "F3", "F4", "F5", "Backquote"
    ]

leftSideKeyCodeList = []
for key in keyCodes:
    if (keyCodes[key] in leftSideKeyList):
        leftSideKeyCodeList.append(int(key))

specialKeys = [
    "left Win", "left Shift", "left Ctrl", "F1", "F2", "F3", "F4", "F5", "Backquote",
    "Backspace", "Enter", "Shift", "Ctrl", "Alt", "Pause/Break", "Page Up", "Page Up",
    "Space", "Page Down", "End", "Home", "Left arrow", "Up arrow", "Right arrow",
    "Down arrow", "Print Screen", "Insert", "Delete", "right Win", "Popup", "Num Lock",
    "Scroll Lock", "right Shift", "right Ctrl",
    "0 in the numeric keypad", "1 in the numeric keypad", "2 in the numeric keypad",
    "3 in the numeric keypad", "4 in the numeric keypad", "5 in the numeric keypad",
    "6 in the numeric keypad", "7 in the numeric keypad", "8 in the numeric keypad",
    "9 in the numeric keypad", "* in the numeric keypad", "+ in the numeric keypad",
    "- in the numeric keypad", ". in the numeric keypad", "/ in the numeric keypad "
]

specialKeyCodes = []
for key in keyCodes:
    if (keyCodes[key] in specialKeys):
        specialKeyCodes.append(int(key))

In [5]:
print(leftSideKeyCodeList)

[9, 20, 27, 32, 49, 50, 51, 52, 53, 54, 65, 66, 67, 68, 69, 70, 71, 81, 82, 83, 84, 86, 87, 88, 90, 91, 112, 113, 114, 115, 116, 160, 162, 192]


In [18]:
# our SVM
classifier = svm.SVC()

users = {
    0 : "Vita",
    1 : "Lesha",
    2 : "Ilya",
    3 : "Tigran"
}

def normalizeTime(dataFrame) :
    timeData = dataFrame["time"].values

    start = []
    end = []
    timeDataShape = timeData.shape[0]
    for i in range(0, timeDataShape - 1):
        if (dataFrame['time'].values[i + 1] - dataFrame['time'].values[i] > 2):
            start.append(dataFrame['time'].index[i + 1])
            end.append(dataFrame['time'].index[i])

    newTimeData = np.zeros(timeDataShape)

    j = 0
    value = 0
    for i in range(0, timeDataShape):
        if (j < len(start)):
                if (dataFrame.index[i] == start[j]):
                    value = value + dataFrame['time'][start[j]] - dataFrame['time'][end[j]] - 2
                    j += 1
        newTimeData[i] = timeData[i] - value

    return newTimeData


def makeTimeDataPlot(timeData):
    plt.figure()
    y = np.zeros(timeData.shape[0])
    plt.scatter(timeData, y, c='red')
    plt.show()


def getCaseParameters(dataFrame):
    def getPrintSpeed(timeData):
        return (timeData[len(timeData) - 1] - timeData[0]) / timeData.shape[0]
    
    params = []

    leftSideDf = dataFrame.loc[(dataFrame["key"].isin(leftSideKeyCodeList))]
    timeData = normalizeTime(leftSideDf)
    params += [getPrintSpeed(timeData)]

    rigthSideDf = dataFrame.loc[(~dataFrame["key"].isin(leftSideKeyCodeList))]
    timeData = normalizeTime(rigthSideDf)
    params += [getPrintSpeed(timeData)]

    specialKeysDf = dataFrame.loc[(dataFrame["key"].isin(specialKeyCodes))]
    mostUsedSpecKeys = specialKeysDf["key"].value_counts()[:1]
    mostUsedSpecKeys = [float(x) for x in mostUsedSpecKeys.index.values]
    params += mostUsedSpecKeys

    timeData = normalizeTime(dataFrame)
    params += [getPrintSpeed(timeData)]

    return params


def processDB(dataFrame, n, yValue):
    dataFrame = dataFrame.loc[(dataFrame.time != 1)]

    dfsize = dataFrame.shape[0]
    dfs = []

    for i in range(n):
        dfs.append(dataFrame.iloc[int(dfsize/n * i): int(dfsize/n * (i + 1))])

    xFeatures = []
    yFeatures = []
    for i in range(len(dfs)):
        params = getCaseParameters(dfs[i])
        xFeatures.append(params)
        yFeatures.append(yValue)

    return xFeatures, yFeatures


def trainSVM(xTrain, yTrain):
    classifier.fit(xTrain, yTrain)


def getDataFrameFromDataBase(dbFileName):
    con = sqlite3.connect(dbFileName)
    dataFrame = pd.read_sql_query("SELECT * from Keyboard", con)
    return dataFrame

def mainFunc() :

    xFeatures = []
    yFeatures = []

    VitaDataFrame = getDataFrameFromDataBase("/keylogger/project/Project3/keyboardVita.sqlite3")
    x, y = processDB(VitaDataFrame, 2, 0)
    xFeatures = xFeatures + x
    yFeatures = yFeatures + y

    LeshaDataFrame = getDataFrameFromDataBase("/keylogger/project/Project3/keyboardLesha.sqlite3")
    x, y = processDB(LeshaDataFrame, 2, 1)
    xFeatures = xFeatures + x
    yFeatures = yFeatures + y

    IlyaDataFrame = getDataFrameFromDataBase("/keylogger/project/Project3/keyboardIlya.sqlite3")
    x, y = processDB(IlyaDataFrame, 2, 2)
    xFeatures = xFeatures + x
    yFeatures = yFeatures + y

    TigranDataFrame = getDataFrameFromDataBase("/keylogger/project/Project3/keyboardTigran.sqlite3")
    x, y = processDB(TigranDataFrame, 2, 3)
    xFeatures = xFeatures + x
    yFeatures = yFeatures + y

    trainSVM(xFeatures, yFeatures)

    LeshaDataFrameTest = getDataFrameFromDataBase("/keylogger/project/Project3/keyboardLeshaTest.sqlite3")
    xTest, y = processDB(LeshaDataFrameTest, 1, 1)

    print(users[classifier.predict(xTest)[0]])
        
          

In [19]:
# самые часто нажимаемые клавиши

# скорость печати

# разделелние клавиатуры на левую и правую части

In [20]:
mainFunc()

Ilya


  mostUsedSpecKeys = specialKeysDf["key"].value_counts()[:1]
  mostUsedSpecKeys = specialKeysDf["key"].value_counts()[:1]
  mostUsedSpecKeys = specialKeysDf["key"].value_counts()[:1]
  mostUsedSpecKeys = specialKeysDf["key"].value_counts()[:1]
  mostUsedSpecKeys = specialKeysDf["key"].value_counts()[:1]
  mostUsedSpecKeys = specialKeysDf["key"].value_counts()[:1]
  mostUsedSpecKeys = specialKeysDf["key"].value_counts()[:1]
  mostUsedSpecKeys = specialKeysDf["key"].value_counts()[:1]
  mostUsedSpecKeys = specialKeysDf["key"].value_counts()[:1]
