In [2]:
import pandas as pd
import numpy as np

In [39]:
# import loggingfile

# import key - mouse
data = pd.read_csv("./key_mouse.log", encoding = "ISO-8859-1", sep = ",",index_col="time")
data.index = pd.to_datetime(data.index)
# define testdata
### columnname
col_time = "timestamp"
col_perif = "perif"
col_loc = "location"
col_event = "event"

print(data.loc[data["perif"] == "keyboard"])

                               perif   location     event
time                                                     
2023-08-24 09:39:26.285740  keyboard        'a'   pressed
2023-08-24 09:39:26.384886  keyboard        'a'  released
2023-08-24 09:39:26.724887  keyboard  Key.space   pressed
2023-08-24 09:39:26.830887  keyboard  Key.space  released
2023-08-24 09:39:27.062916  keyboard        'g'   pressed
...                              ...        ...       ...
2023-08-24 09:40:35.036598  keyboard        'i'  released
2023-08-24 09:40:35.112595  keyboard        'o'   pressed
2023-08-24 09:40:35.210594  keyboard        'o'  released
2023-08-24 09:40:35.267595  keyboard        'n'   pressed
2023-08-24 09:40:35.374595  keyboard        'n'  released

[342 rows x 3 columns]


In [2]:
# define testdata
### columnname
col_time = "timestamp"
col_perif = "perif"
col_loc = "location"
col_event = "event"

data_keyboard = pd.DataFrame(
    data={
        col_time: pd.date_range("2018-01-01", periods=10, freq="H").tolist(),
        col_perif: ["keyboard","keyboard","keyboard","keyboard","keyboard","keyboard","keyboard","keyboard","keyboard","keyboard"],
        col_loc:["e","f","e","e","e","e","f","e","e","e"],
        col_event:["pressed","pressed", "released","pressed","pressed", "released", "released","pressed","pressed", "released",],
    }
)

data_mouse = pd.DataFrame(
    data={
        col_time: pd.date_range("2018-01-01 00:00:01", periods=10, freq="H").tolist(),
        col_perif: ["mouse"]*10,
        col_loc:["loc","loc","loc","loc","loc","loc","lmb","lmb", "lmb","lmb",],
        col_event:[ (0,0),(1,3),(0,0),(1,3),(0,0),(1,3),"pressed", "released","pressed", "released",],
    }
)

data_total = pd.concat([data_keyboard,data_mouse]).set_index(col_time).sort_index()
print(data_total)

                        perif location     event
timestamp                                       
2018-01-01 00:00:00  keyboard        e   pressed
2018-01-01 00:00:01     mouse      loc    (0, 0)
2018-01-01 01:00:00  keyboard        f   pressed
2018-01-01 01:00:01     mouse      loc    (1, 3)
2018-01-01 02:00:00  keyboard        e  released
2018-01-01 02:00:01     mouse      loc    (0, 0)
2018-01-01 03:00:00  keyboard        e   pressed
2018-01-01 03:00:01     mouse      loc    (1, 3)
2018-01-01 04:00:00  keyboard        e   pressed
2018-01-01 04:00:01     mouse      loc    (0, 0)
2018-01-01 05:00:00  keyboard        e  released
2018-01-01 05:00:01     mouse      loc    (1, 3)
2018-01-01 06:00:00  keyboard        f  released
2018-01-01 06:00:01     mouse      lmb   pressed
2018-01-01 07:00:00  keyboard        e   pressed
2018-01-01 07:00:01     mouse      lmb  released
2018-01-01 08:00:00  keyboard        e   pressed
2018-01-01 08:00:01     mouse      lmb   pressed
2018-01-01 09:00:00 

In [40]:
# define Dataobj

class DataPerif:
    # input has to be sorted
    def __init__(self, all_data, tf_sec) -> None:
        # split data
        self.keyboard_data = all_data.loc[all_data[col_perif] == "keyboard"]
        self.analog_data = all_data.loc[all_data[col_perif] == "analog"]
        self.mouse_data =  all_data.loc[all_data[col_perif] == "mouse"]
        
        # get generell info
        self.time_frame = tf_sec
        self.max_time = all_data.index.max()

        # calculate info
        # keyboard
        self.nr_key_strokes = len(self.keyboard_data.loc[self.keyboard_data[col_event] == "pressed"])
        self.key_presses = self.get_full_press()
        self.key_press_time  = pd.Series([tup[1] for tup in self.key_presses]).mean()
        self.key_dead_times = self.get_dead_times()
        self.key_dead_time_avg = pd.Series([tup[0] for tup in self.key_dead_times]).mean()
        self.key_backspaces = 33 # TODO: Is this best way to measure mistakes

    def get_full_press(self):
        full_presses = []
        keys_grouped = list(self.keyboard_data.groupby(col_loc))
        
        for key_list in keys_grouped:
            key = key_list[0]
            key_data = key_list[1]
            if len(key_data)>1:

                print(key)
                print(key_data)
                # remove incomplete presses 
                key_data_full_presses = key_data.loc[(key_data[col_event] == "released").idxmin() :(key_data[col_event] == "released")[::-1].idxmax() ]
                
                # remove double press event  TODO maybe add number 
                key_data_rem_duplicate = list(key_data_full_presses.loc[key_data_full_presses[col_event].shift() != key_data_full_presses[col_event]].groupby(col_event))
                
                full_presses += [(key, 
                                key_data_rem_duplicate[1][1].index[num] - key_data_rem_duplicate[0][1].index[num], 
                                key_data_rem_duplicate[0][1].index[num], 
                                key_data_rem_duplicate[1][1].index[num] ) for num, _ in enumerate(key_data_rem_duplicate[1][1].values)]
        return full_presses

    def get_avg_time(self):
        # not used, because its one line
        times = pd.Series([tup[1] for tup in self.key_presses]).mean()
        return times
    
    def get_dead_times(self):
        # TODO define edgecases
        # return max size if no complete presses
        # get complete timeframe
        out_col = "off_time"
        all_times = self.keyboard_data
        all_times = pd.concat([all_times.iloc[[0]], all_times, all_times.iloc[[-1]]])
        all_times[out_col] = True

        # filter the keypresses
        for on_time in self.key_presses:
            all_times.loc[(all_times.index > on_time[2]) & (all_times.index < on_time[3]), out_col] = False
        #print(all_times)

        press_edges = all_times.loc[all_times[out_col].shift() != all_times[out_col]]
        press_edges = press_edges.loc[(press_edges[out_col] == False).idxmin():(press_edges[out_col] == False)[::-1].idxmax() ] # TODO: change when edgecases defined
        print(press_edges)
        
        dead_times=list((press_edges.iloc[2*i+1].name-press_edges.iloc[2*i].name, press_edges.iloc[2*i].name,press_edges.iloc[2*i+1].name,)  for i in range(int(len(press_edges)/2)))
        #print(all_times.loc[out_col == True])
        return dead_times

    def output_string(self):
        # returns console output to test
        print(f'TIME:')
        print(f'Timeframe: {self.time_frame}\nEndtime: {self.max_time}\n')
        print(f'Keyboard:\nN.o. Activations: {self.nr_key_strokes}\nAvg. Presstime: {self.key_press_time}\nAvg. Deadtime: {self.key_dead_time_avg}')

class KeyPress:
    # one Keypress
    def __init__(self) -> None:
        pass
class KeyPressLetter:
    # all completed presses of one letter in timeframe
    def __init__(self) -> None:
        self.start_time = True
    


dat_obj = DataPerif(data,30)
#print(dat_obj.max_time)

'a'
                               perif location     event
time                                                   
2023-08-24 09:39:26.285740  keyboard      'a'   pressed
2023-08-24 09:39:26.384886  keyboard      'a'  released
2023-08-24 09:39:27.359729  keyboard      'a'   pressed
2023-08-24 09:39:27.461428  keyboard      'a'  released
2023-08-24 09:39:49.434372  keyboard      'a'   pressed
2023-08-24 09:39:49.518213  keyboard      'a'  released
2023-08-24 09:39:53.073171  keyboard      'a'   pressed
2023-08-24 09:39:53.267170  keyboard      'a'  released
2023-08-24 09:40:02.103006  keyboard      'a'   pressed
2023-08-24 09:40:02.304553  keyboard      'a'  released
2023-08-24 09:40:08.303870  keyboard      'a'   pressed
2023-08-24 09:40:08.451395  keyboard      'a'  released
2023-08-24 09:40:26.683362  keyboard      'a'   pressed
2023-08-24 09:40:26.781394  keyboard      'a'  released
2023-08-24 09:40:31.070595  keyboard      'a'   pressed
2023-08-24 09:40:31.217595  keyboard      'a

In [41]:
print("Keyboard data:\nraw:\n")
print(dat_obj.keyboard_data)

print(f'Anzahl eingegebener Buchstaben {dat_obj.nr_key_strokes}')
dat_obj.output_string()




Keyboard data:
raw:

                               perif   location     event
time                                                     
2023-08-24 09:39:26.285740  keyboard        'a'   pressed
2023-08-24 09:39:26.384886  keyboard        'a'  released
2023-08-24 09:39:26.724887  keyboard  Key.space   pressed
2023-08-24 09:39:26.830887  keyboard  Key.space  released
2023-08-24 09:39:27.062916  keyboard        'g'   pressed
...                              ...        ...       ...
2023-08-24 09:40:35.036598  keyboard        'i'  released
2023-08-24 09:40:35.112595  keyboard        'o'   pressed
2023-08-24 09:40:35.210594  keyboard        'o'  released
2023-08-24 09:40:35.267595  keyboard        'n'   pressed
2023-08-24 09:40:35.374595  keyboard        'n'  released

[342 rows x 3 columns]
Anzahl eingegebener Buchstaben 171
TIME:
Timeframe: 30
Starttime: 2023-08-24 09:40:38.509640

Keyboard:
N.o. Activations: 171
Avg. Presstime: 0 days 00:00:00.113884347
Avg. Deadtime: 0 days 00:00:02.95