## Given code

In [53]:
import numpy as np

def read_int(f):
    ba = bytearray(4)
    f.readinto(ba)
    prm = np.frombuffer(ba, dtype=np.int32)
    return prm[0]

def read_double(f):
    ba = bytearray(8)
    f.readinto(ba)
    prm = np.frombuffer(ba, dtype=np.double)
    return prm[0]

def read_double_tab(f, n):
    ba = bytearray(8*n)
    nr = f.readinto(ba)
    if nr != len(ba):
        return []
    else:
        prm = np.frombuffer(ba, dtype=np.double)
        return prm

def get_pics_from_file(filename):
    f_pic = open(filename, "rb")
    info = dict()
    info["nb_pics"] = read_int(f_pic)
    info["freq_sampling_khz"] = read_double(f_pic)
    info["freq_trame_hz"] = read_double(f_pic)
    info["freq_pic_khz"] = read_double(f_pic)
    info["norm_fact"] = read_double(f_pic)
    tab_pics = []
    pics = read_double_tab(f_pic, info["nb_pics"])
    nb_trames = 1
    while len(pics) > 0:
        nb_trames = nb_trames + 1
        tab_pics.append(pics)
        pics = read_double_tab(f_pic, info["nb_pics"])
    f_pic.close()
    return tab_pics, info

# Our Algorithm

### To visualize binaries as PNGs

In [54]:
# for file in glob.glob("../data/pics_*.bin"):
#    key = file.split("pics_")[1].replace(".bin", "")
#    values, info = get_pics_from_file(file)
#    plt.imshow(values,aspect="auto",)
#    plt.savefig("../data/PNGs/pics_" + key + ".png")

### Libraries used

In [55]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix,f1_score,accuracy_score
import glob
import pickle

### Creation of training and testing sets

In [56]:

X,Y = [],[]

# Average value of NOKEY
pics_nokey, info = get_pics_from_file("../data/pics_NOKEY.bin")
nokey = np.mean(pics_nokey,axis=0)

for file in glob.glob("../data/pics_*.bin"):
    key = file.split("pics_")[1].replace(".bin","")
    if key == "LOGINMDP":
        continue
    values, info = get_pics_from_file(file)

    # Sampling the key signal
    np.random.shuffle(values)
    values = values[:3000]

    # Reducing noise
    values = np.subtract(values,nokey)

    for frame in values:
        X.append(frame)
        Y.append(key)

X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2)

# Data scaling
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

### KNN Model
<font color="red"> A ne pas éxecuter lors du test </font>

In [57]:
"""
classifier = KNeighborsClassifier(n_neighbors=40, p=2, metric='euclidean',n_jobs=-1)
classifier.fit(X_train,Y_train)
Y_pred = classifier.predict(X_test)

#### K-cross validation ####
# from sklearn.model_selection import cross_val_score
# cv_scores = cross_val_score(classifier,X,Y, cv=10)
# print(cv_scores)
# print(np.mean(cv_scores))

### Evaluating model ###
print("Accuracy score:", accuracy_score(Y_test,Y_pred),"%")

# from sklearn.metrics import classification_report
# print(classification_report(Y_test, Y_pred))

# cm = confusion_matrix(Y_test,Y_pred)
# print(cm)
"""

'\nclassifier = KNeighborsClassifier(n_neighbors=40, p=2, metric=\'euclidean\',n_jobs=-1)\nclassifier.fit(X_train,Y_train)\nY_pred = classifier.predict(X_test)\n\n#### K-cross validation ####\n# from sklearn.model_selection import cross_val_score\n# cv_scores = cross_val_score(classifier,X,Y, cv=10)\n# print(cv_scores)\n# print(np.mean(cv_scores))\n\n### Evaluating model ###\nprint("Accuracy score:", accuracy_score(Y_test,Y_pred),"%")\n\n# from sklearn.metrics import classification_report\n# print(classification_report(Y_test, Y_pred))\n\n# cm = confusion_matrix(Y_test,Y_pred)\n# print(cm)\n'

### To save the current model

In [58]:
# pickle.dump(classifier,open("./KNN_model.sav",'wb'))
# pickle.dump(sc_X,open("./scaling_function.sav",'wb'))

### Removing noise and segmenting signal

<font color="#D5B60A"> Some chunks are missing so can't be used in the project </font>

In [59]:
"""
def pics_distance(lline, rline):
    return np.linalg.norm(lline - rline)

guess, info = get_pics_from_file("../data/pics_LOGINMDP.bin")

guess = np.subtract(guess, nokey)
chunks = [] # contains the lines of each chunk
chunks_index = [] # location of the start of each chunk
empty = np.subtract(nokey, nokey)
i = 0
threshold = 1.4 # threshold to detect a nokey
min_nokey_length = 10 # consider there is a nokey based on this length
while i < len(guess):
    nokey_length = 0
    while i < len(guess) and pics_distance(guess[i], empty) < threshold:
        nokey_length += 1
        i += 1
    if nokey_length > min_nokey_length:
        chunks.append([])
        chunks_index.append(i)
    while i < len(guess) and pics_distance(guess[i], empty) >= threshold:
        if len(chunks) == 0:
            chunks.append([])
            chunks_index.append(i)
        chunks[-1].append(guess[i])
        i += 1

# cleaning noise
j = 0
while j < len(chunks):
    if len(chunks[j]) < 5:
        chunks.pop(j)
        chunks_index.pop(j)
    else:
        j += 1

print(len(chunks_index))
print(chunks_index)
# for chunk in chunks:
#     print(len(chunk))
"""

'\ndef pics_distance(lline, rline):\n    return np.linalg.norm(lline - rline)\n\nguess, info = get_pics_from_file("../data/pics_LOGINMDP.bin")\n\nguess = np.subtract(guess, nokey)\nchunks = [] # contains the lines of each chunk\nchunks_index = [] # location of the start of each chunk\nempty = np.subtract(nokey, nokey)\ni = 0\nthreshold = 1.4 # threshold to detect a nokey\nmin_nokey_length = 10 # consider there is a nokey based on this length\nwhile i < len(guess):\n    nokey_length = 0\n    while i < len(guess) and pics_distance(guess[i], empty) < threshold:\n        nokey_length += 1\n        i += 1\n    if nokey_length > min_nokey_length:\n        chunks.append([])\n        chunks_index.append(i)\n    while i < len(guess) and pics_distance(guess[i], empty) >= threshold:\n        if len(chunks) == 0:\n            chunks.append([])\n            chunks_index.append(i)\n        chunks[-1].append(guess[i])\n        i += 1\n\n# cleaning noise\nj = 0\nwhile j < len(chunks):\n    if len(chun

## Running the model on a signal

In [60]:
# Loading the model
classifier = pickle.load(open("./KNN_model.sav",'rb'))
sc_X = pickle.load(open("./scaling_function.sav",'rb'))

# Loading the signal
input, info = get_pics_from_file("../data/pics_LOGINMDP.bin")

# Average value of NOKEY
pics_nokey, info = get_pics_from_file("../data/pics_NOKEY.bin")
nokey = np.mean(pics_nokey,axis=0)

output = []
step = 90

for i in range(30000000):
    if (step * (i + 1)) >= len(input):
        break
    curr = input[step * i:step * (i + 1)] # chunking
    curr = np.subtract(curr, nokey) # removing noise
    curr = sc_X.transform(curr) # scaling
    curr_output = classifier.predict(curr)

    # Picking best guess
    unique, pos = np.unique(curr_output, return_inverse=True)
    max_value = unique[np.bincount(pos).argmax()]

    if max_value == 'NOKEY':
        continue

    # Guessing key pressed with the SHIFT key (if there is)
    if max_value == 'SHIFT':
        curr = input[step * i:step * (i + 1)]

        # Removing the meaningful value of SHIFT's signal
        for j, frame in enumerate(curr):
            frame[5] = 0
            curr[j] = frame

        curr = np.subtract(curr,nokey)
        curr = sc_X.transform(curr)
        curr_output = classifier.predict(curr)
        unique, pos = np.unique(curr_output, return_inverse=True)
        max_value = unique[np.bincount(pos).argmax()]

        if max_value == 'NOKEY' or max_value == 'SHIFT':
            continue # Nothing added to input
        if output == [] or ("SHIFT " + max_value) != output[-1]:
            output.append("SHIFT " + max_value)

    elif output == [] or max_value != output[-1]:
        output.append(max_value)

In [61]:
print("The input's length is:", len(output))
for a in output:
    print(a)

The input's length is: 20
CTRL
SUPPR
SHIFT C
SHIFT G
SHIFT W
SHIFT P
SHIFT I
H
A
C
K
A
G
O
N
2
0
2
1
ENTER
