### Imports and parameters

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
import re
%matplotlib notebook

In [None]:
file = "inputs/2022-11-23-12_13_44.csv"

In [None]:
df = pd.read_csv(file)
df.head()

### Only keep keys that are of our interest

In [None]:
df_trim = df[(df['KEY'] == 'Key.left') | (df['KEY'] == 'Key.right') | (df['KEY'] == '\'d\'')]

df_trim.loc[(df_trim['KEY'] == 'Key.right') & (df_trim['STATUS'] == 'DOWN'), 'action'] = 'R'
df_trim.loc[(df_trim['KEY'] == 'Key.right') & (df_trim['STATUS'] == 'UP'), 'action'] = 'r'

df_trim.loc[(df_trim['KEY'] == 'Key.left') & (df_trim['STATUS'] == 'DOWN'), 'action'] = 'L'
df_trim.loc[(df_trim['KEY'] == 'Key.left') & (df_trim['STATUS'] == 'UP'), 'action'] = 'l'

df_trim.loc[(df_trim['KEY'] == '\'d\'') & (df_trim['STATUS'] == 'DOWN'), 'action'] = 'J'
df_trim.loc[(df_trim['KEY'] == '\'d\'') & (df_trim['STATUS'] == 'UP'), 'action'] = 'j'

In [None]:
df_trim = df_trim[["FRAME", "action"]].reset_index(drop=True)
df_trim.head()

### Extract timeline

In [None]:
start = df_trim.FRAME.min()

# r, j, l
data = {"R": [], "L": [], "J": []}
r=0
l=0
j=0
for index, row in df_trim.iterrows():
    if row.action == "R".upper():
        data["R"].append([row.FRAME])
    if row.action == "R".lower():
        data["R"][-1].append(row.FRAME - data["R"][-1][0])
        
    if row.action == "L".upper():
        data["L"].append([row.FRAME])
    if row.action == "L".lower():
        data["L"][-1].append(row.FRAME - data["L"][-1][0])
        
    if row.action == "J".upper():
        data["J"].append([row.FRAME])
    if row.action == "J".lower():
        data["J"][-1].append(row.FRAME - data["J"][-1][0])

In [None]:
fig, ax = plt.subplots()
ax.broken_barh(data["R"], (10, 9), facecolors='tab:red')
ax.broken_barh(data["L"], (20, 9), facecolors='tab:green')
ax.broken_barh(data["J"], (30, 9), facecolors='tab:blue')
plt.show()

### Extract patterns

In [None]:
all_keys = ''.join(list(df_trim["action"]))
all_keys

In [None]:
def getAllSubStrings(x, l=None, freq=False):
    if l == None:
        l = len(x)
    
    allSubStrings = [x[i:i+l] for i in range(0, len(x)) if len(x[i:i+l]) == l]

    if freq:
        return allSubStrings
    else:
        return set(allSubStrings)

In [None]:
def extract_freq(x, l=3, sort='value'):
    res = {}
    substrings = getAllSubStrings(all_keys, l=l, freq=True)
    for idx in substrings:
        if idx not in res.keys():
            res[idx] = 1
        else:
            res[idx] += 1
    if sort == 'key':
        return dict(sorted(res.items(), key=lambda item: item[0], reverse=False))
    elif sort == 'value':
        return dict(sorted(res.items(), key=lambda item: item[1], reverse=True))

In [None]:
all_patterns = {}
for i in range(4, 11):
    all_patterns.update(extract_freq(all_keys, l=i, sort='value'))
all_patterns = dict(sorted(all_patterns.items(), key=lambda item: item[1], reverse=True))
all_patterns

### First 10 patterns

In [None]:
from itertools import islice
n_items = list(islice(all_patterns.items(), 10))
print(n_items)
keys = []
values = []
for i, (k, v) in enumerate(n_items):
    k = k.replace("J", "J(")
    k = k.replace("j", ")")
    k = k.replace("R", "R(")
    k = k.replace("r", ")")
    k = k.replace("L", "L(")
    k = k.replace("l", ")")
    keys.append(k)
    values.append(v)
    
fig, ax = plt.subplots()
ax.bar(keys, values)
plt.show()

### Where are the patterns

In [None]:
place_patterns = {}
for pattern in all_patterns.keys():
    place_patterns[pattern] = []
    for m in re.finditer(f'(?={pattern})', all_keys):
        place_patterns[pattern].append(df_trim.iloc[m.start()].FRAME)
place_patterns

### Example: finding the "double jump" pattern

In [None]:
fig, ax = plt.subplots()
ax.broken_barh(data["R"], (10, 9), facecolors='tab:red')
ax.broken_barh(data["L"], (20, 9), facecolors='tab:green')
ax.broken_barh(data["J"], (30, 9), facecolors='tab:blue')
ax.scatter(place_patterns["JjJj"], len(place_patterns["JjJj"]) * [40], c='red')
plt.show()