In [159]:
import numpy as np
import pandas as pd
import glob
import os
from pathlib import Path
from itertools import product

DATA = '/Users/caldera/Documents/EPFL/GAP/Project/Projet-Gameplay/wip/kevin/data'

#right, left, jump, fire
REMAP = {'Key.right': 'R', 
         'Key.left': 'L', 
         '\'d\'': 'J', 
         'Key.space': 'J',
         'Key.up': 'J'}

FPS = 60
N_FRAME = [2*FPS, 10*FPS, 60*FPS, 300*FPS]
BLOCK_TIME = ['2s', '10s', '1min', '5min']
MIN_OCCUR = [1, 2, 5, 20]
p3 = [''.join(p) for n in range(2,4) for p in product('RLJ', repeat=n)]
p4 = [''.join(p) for n in range(2,5) for p in product('RLJ', repeat=n)]
p10 = [''.join(p) for n in range(2,11) for p in product('RLJ', repeat=n)]
p15 = [''.join(p) for n in range(2,16) for p in product('RLJ', repeat=n)]
PATTERN_MAX_LENGTH = [3, 4, 10, 15]
PATTERNS = [p3, p4, p10, p15]


def slice_keylog_file(frames_and_inputs_list, predicted=True):

    for i, run in enumerate(frames_and_inputs_list):
        for block_time, block_size in zip(BLOCK_TIME, N_FRAME):
            block = list()
            n_block = int(run.FRAME.max()/block_size)

            for period in range(n_block):
                slice = run[(run.FRAME >= period*block_size) & (run.FRAME < (period+1)*block_size)]
                block.append(slice)

                if predicted:
                    save_path = DATA + '/sliced_logs/predicted/run_{}/{}/slice{}.csv'.format(i, block_time, period) 
                else:
                    save_path = DATA + '/sliced_logs/ground_truth/run_{}/{}/slice{}.csv'.format(i, block_time, period) 

                filepath2 = Path(save_path)  
                filepath2.parent.mkdir(parents=True, exist_ok=True)  
                slice.to_csv(filepath2) 


#read csv file (inputs), return list of pressed keys (down only and standardized as LRJ)
# slices the file au passage
def read_and_standardize_logs(csv_file, predicted=True):

    data = pd.read_csv(csv_file)
    #data['run_number'] = i

    main_keys = data.KEY.value_counts().keys()[:4].tolist() #might not be necessary for predicted inputs
    data =  data[data.KEY.isin(main_keys)]


    keylogs = data[(data.KEY != '\'a\'') & (data.KEY != 'Key.enter') & (data.KEY != '\'s\'')] # A modifier quand on saura ce que predit exactement le modele
    keylogs = keylogs.replace({"KEY": REMAP}) #idem
    keylogs = keylogs[keylogs.STATUS == 'DOWN'] #on garde que les touches pressées atm

    #slice_keylog_file(frames_and_inputs_list, predicted)

    return keylogs



#inputs_log : must contain a "KEY" column with only letter L,R,J
#return Series of pattern count in decreasing order; the pattern is the index
def get_pattern_count(standardized_inputs, max_pattern_size, min_pattern_occur=10):
    d=dict()
    keys_as_str = standardized_inputs.KEY.str.cat()
    
    for pattern in PATTERNS[max_pattern_size]:
        d[pattern]=keys_as_str.count(pattern)

    frequencies = pd.Series(dict(sorted(d.items(), key=lambda x: x[1], reverse=True)))
    
    return (frequencies[frequencies > min_pattern_occur]).to_dict()

In [163]:
frequency_dataset = list()

files = sorted(Path(DATA + '/keylogs/ground_truth').glob('*.csv')) #ne preserve pas l'ordre

#liste des keylogs de la run i à la position i 
frames_and_inputs_list = list()

for i, f in enumerate(files):
    data = pd.read_csv(f)
    data['run_number'] = i

    main_keys = data.KEY.value_counts().keys()[:4].tolist()
    data =  data[data.KEY.isin(main_keys)]

    keylogs = data[(data.KEY != '\'a\'') & (data.KEY != 'Key.enter') & (data.KEY != '\'s\'')] # A modifier quand on saura ce que predit exactement le modele
    keylogs = keylogs.replace({"KEY": REMAP}) #idem
    keylogs = keylogs[keylogs.STATUS == 'DOWN'] #on garde que les touches pressées atm

    frames_and_inputs_list.append(keylogs)

for i, run in enumerate(frames_and_inputs_list):
    for block_time, block_size, max_pattern_size, min_occur in zip(BLOCK_TIME, N_FRAME, PATTERN_MAX_LENGTH, MIN_OCCUR):
        n_block = int(run.FRAME.max()/block_size)

        for period in range(n_block):
            slice = run[(run.FRAME >= period*block_size) & (run.FRAME < (period+1)*block_size)]

            freqs = get_pattern_count(slice, max_pattern_size, min_occur)

            for pattern in freqs.keys():
                frequency_dataset.append([i, block_time, period, pattern, freqs[pattern]])
            
            save_path = DATA + '/test/sliced_logs/ground_truth/run_{}/{}/slice{}.csv'.format(i, block_time, period) 
            filepath2 = Path(save_path)  
            filepath2.parent.mkdir(parents=True, exist_ok=True)  
            slice.to_csv(filepath2) 

KeyboardInterrupt: 

In [156]:
df = pd.DataFrame(frequency_dataset)

df.to_csv('/Users/caldera/Documents/EPFL/GAP/Project/Projet-Gameplay/wip/kevin/data/test/freq_dataset.csv')

In [137]:
runs

[      FRAME        KEY STATUS  run_number
 2       405  Key.right   DOWN           0
 3       428  Key.right     UP           0
 4       437        'd'   DOWN           0
 5       457        'd'     UP           0
 6       472  Key.right   DOWN           0
 ...     ...        ...    ...         ...
 1945  41712        's'   DOWN           0
 1946  41738        's'     UP           0
 1947  41744        'd'   DOWN           0
 1948  41760        'd'     UP           0
 1949  41763  Key.right     UP           0
 
 [1920 rows x 4 columns],
        FRAME        KEY STATUS  run_number
 4        186  Key.right   DOWN           1
 5        191        's'   DOWN           1
 6        302  Key.space   DOWN           1
 7        311  Key.space     UP           1
 8        330  Key.right     UP           1
 ...      ...        ...    ...         ...
 6729  130284   Key.left     UP           1
 6730  130295  Key.right   DOWN           1
 6731  130323  Key.right     UP           1
 6732  130323  K

In [101]:
#all inputs
'''
all_inputs = pd.concat(runs, ignore_index=True)
all_inputs

all_inputs = all_inputs[(all_inputs.KEY != '\'a\'') & (all_inputs.KEY != 'Key.enter') & (all_inputs.KEY != '\'s\'')]

only_R_L_J = all_inputs.replace({"KEY": REMAP})

filepath = Path(DATA + '/all_inputs_concat.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
only_R_L_J.to_csv(filepath) 

pressed_keys = only_R_L_J[only_R_L_J.STATUS == 'DOWN']

all_pressed_keys = pressed_keys.KEY 

all_pressed_keys

'''

'\nall_inputs = pd.concat(runs, ignore_index=True)\nall_inputs\n\nall_inputs = all_inputs[(all_inputs.KEY != \'\'a\'\') & (all_inputs.KEY != \'Key.enter\') & (all_inputs.KEY != \'\'s\'\')]\n\nonly_R_L_J = all_inputs.replace({"KEY": REMAP})\n\nfilepath = Path(DATA + \'/all_inputs_concat.csv\')  \nfilepath.parent.mkdir(parents=True, exist_ok=True)  \nonly_R_L_J.to_csv(filepath) \n\npressed_keys = only_R_L_J[only_R_L_J.STATUS == \'DOWN\']\n\nall_pressed_keys = pressed_keys.KEY \n\nall_pressed_keys\n\n'

In [112]:
#TOTAL FREQUENCIES OF PATTERNS OVER ALL GROUND TRUTH
pattern_freq = list()

for run in frames_and_inputs_list:
    keys_as_str = run.KEY.str.cat()

    d=dict()
    for i in PATTERNS:
        d[i]=keys_as_str.count(i)

    frequencies = dict(sorted(d.items(), key=lambda x: x[1], reverse=True))
    frequencies = pd.Series(frequencies)
    frequencies = frequencies[frequencies > 10]
    
    pattern_freq.append(frequencies)



In [164]:
aggreg = pd.Series({'RJ': 0})

for s in pattern_freq:
    aggreg = aggreg.add(s, fill_value=0)

aggreg.sort_values(ascending=False).astype(int).head(20)



RJ      2606
JR      2192
JJ      1679
JL      1245
JRJ     1033
LR      1017
RR       947
RJJ      920
LJ       835
RJR      761
JJJ      735
RRJ      677
JJR      667
RJL      642
RL       611
JRR      576
LRJ      553
JLR      479
LJR      458
RJJJ     455
dtype: int64

In [128]:
aggreg.describe()

count     249.000000
mean      135.349398
std       301.631549
min        11.000000
25%        14.000000
50%        29.000000
75%       108.000000
max      2606.000000
dtype: float64

In [139]:
for i, run in enumerate(frames_and_inputs_list):
    for block_time, block_size in zip(BLOCK_TIME, N_FRAME):
        block = list()
        n_block = int(run.FRAME.max()/block_size)

        for period in range(n_block):
            slice = run[(run.FRAME >= period*block_size) & (run.FRAME < (period+1)*block_size)]
            block.append(slice)

        
            filepath2 = Path(save_path)  
            filepath2.parent.mkdir(parents=True, exist_ok=True)  
            slice.to_csv(filepath2) 

NameError: name 'predicted' is not defined

In [None]:
path1 = '/Users/caldera/Documents/EPFL/GAP/Project/Projet-Gameplay/wip/kevin/data/test/sliced_logs/ground_truth'
for i in range(23):
    dir = '/run_{}'.format(i)
    
    for length in BLOCK_TIME:
        if os.path.exists(path1 + dir + '/' + length):