In [1]:
import pandas as pd
from datetime import datetime, timedelta
import time

In [2]:
vcp_log_df = pd.read_csv('../data/logs/vcp_log.csv')

In [3]:
vcp_log_df.tail()

Unnamed: 0,datetime,input,mode
1407,2020-02-08 14:08:22.826858,right,sequence
1408,2020-02-08 14:08:29.921605,except,sequence
1409,2020-02-08 14:08:46.339166,confirm,sequence
1410,2020-02-08 14:08:46.692154,confirm,sequence
1411,2020-02-08 14:10:32.699952,yes,sequence


In [4]:
vcp_log_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1412 entries, 0 to 1411
Data columns (total 3 columns):
datetime    1412 non-null object
input       1412 non-null object
mode        1412 non-null object
dtypes: object(3)
memory usage: 33.2+ KB


In [5]:
#check type of datetime column
dt_ob = vcp_log_df.loc[0, 'datetime']
print(type(dt_ob), dt_ob)

<class 'str'> 2020-02-07 21:22:43.411133


In [6]:
#check for appropriate conversion format to datetime
datetime.strptime(dt_ob, '%Y-%m-%d %H:%M:%S.%f')

datetime.datetime(2020, 2, 7, 21, 22, 43, 411133)

In [7]:
vcp_dt_df = vcp_log_df.copy()

In [8]:
#convert all strings in the datetime column
vcp_dt_df['datetime'] = pd.to_datetime(vcp_dt_df['datetime'], format='%Y-%m-%d %H:%M:%S.%f')

In [9]:
vcp_dt_df.tail()

Unnamed: 0,datetime,input,mode
1407,2020-02-08 14:08:22.826858,right,sequence
1408,2020-02-08 14:08:29.921605,except,sequence
1409,2020-02-08 14:08:46.339166,confirm,sequence
1410,2020-02-08 14:08:46.692154,confirm,sequence
1411,2020-02-08 14:10:32.699952,yes,sequence


In [10]:
vcp_dt_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1412 entries, 0 to 1411
Data columns (total 3 columns):
datetime    1412 non-null datetime64[ns]
input       1412 non-null object
mode        1412 non-null object
dtypes: datetime64[ns](1), object(2)
memory usage: 33.2+ KB


In [11]:
#number of rows to iterate through
len(vcp_dt_df)

1412

In [12]:
def playsession_analysis(interval): #interval is number of seconds between each voice command in the same session
    playsession_list = []
    playsession_entry = {
        'session_start' : vcp_dt_df.loc[0, 'datetime'],
        'session_end' : vcp_dt_df.loc[0, 'datetime'],
        'session_seconds' : timedelta().total_seconds(),
        'sequence' : 1 if vcp_dt_df.loc[0, 'mode'] == 'sequence' else 0,
        'special' : 1 if vcp_dt_df.loc[0, 'mode'] == 'special' else 0,
        'incorrect' : 1 if vcp_dt_df.loc[0, 'mode'] == 'incorrect' else 0,
        'total_commands' : 1
    }
#     print(playsession_entry)

#     last_row = 100
    last_row = len(vcp_dt_df)
    for i in range(1, last_row):
        #if the difference between timestamps on consecutive commands is within the interval,
        #then the two commands are considered part of the same session and the current entry is updated
        if (vcp_dt_df.loc[i, 'datetime'] - vcp_dt_df.loc[i-1, 'datetime']).total_seconds() < interval:
            playsession_entry['session_end'] = vcp_dt_df.loc[i, 'datetime']
            playsession_entry[vcp_dt_df.loc[i, 'mode']] += 1 #increase the count of a specific command mode (sequence, special, or incorrect)
            playsession_entry['total_commands'] += 1
        #otherwise the current entry is saved to a list and a new entry is started
        else:
            playsession_entry['session_seconds'] = (playsession_entry['session_end'] - playsession_entry['session_start']).total_seconds()
            playsession_list.append(playsession_entry)
#             print(playsession_entry, '\n')

            playsession_entry = {
                'session_start' : vcp_dt_df.loc[i, 'datetime'],
                'session_end' : vcp_dt_df.loc[i, 'datetime'],
                'session_seconds' : timedelta().total_seconds(),
                'sequence' : 1 if vcp_dt_df.loc[i, 'mode'] == 'sequence' else 0,
                'special' : 1 if vcp_dt_df.loc[i, 'mode'] == 'special' else 0,
                'incorrect' : 1 if vcp_dt_df.loc[i, 'mode'] == 'incorrect' else 0,
                'total_commands' : 1
            }
    #after every row has been processed, the last entry is completed and added to the list
    playsession_entry['session_seconds'] = (playsession_entry['session_end'] - playsession_entry['session_start']).total_seconds()
    playsession_list.append(playsession_entry)
#     print(playsession_entry, '\n')
    #the resulting list of dictionaries is converted into a pandas dataframe for further analysis and/or writing to csv
    return pd.DataFrame(playsession_list)

In [13]:
#2 min between commands in a session
playsession_analysis(120)

Unnamed: 0,session_start,session_end,session_seconds,sequence,special,incorrect,total_commands
0,2020-02-07 21:22:43.411133,2020-02-07 21:25:21.606925,158.195792,7,1,0,8
1,2020-02-07 21:27:46.906810,2020-02-07 21:31:16.524228,209.617418,16,4,0,20
2,2020-02-07 22:08:45.819384,2020-02-07 22:09:13.219582,27.400198,4,1,0,5
3,2020-02-07 22:17:18.292342,2020-02-07 22:18:14.771496,56.479154,3,3,0,6
4,2020-02-07 22:22:58.597474,2020-02-07 22:23:58.466970,59.869496,5,5,0,10
5,2020-02-08 10:12:22.110039,2020-02-08 10:16:36.657616,254.547577,8,4,0,12
6,2020-02-08 10:20:28.654799,2020-02-08 10:21:55.373558,86.718759,1,0,3,4
7,2020-02-08 10:29:00.471188,2020-02-08 10:40:55.113595,714.642407,43,11,6,60
8,2020-02-08 10:46:49.793644,2020-02-08 10:48:00.941144,71.1475,0,10,1,11
9,2020-02-08 10:52:45.102471,2020-02-08 10:54:03.265708,78.163237,6,23,0,29


In [14]:
#5 min between commands in a session
playsession_analysis(300)

Unnamed: 0,session_start,session_end,session_seconds,sequence,special,incorrect,total_commands
0,2020-02-07 21:22:43.411133,2020-02-07 21:31:16.524228,513.113095,23,5,0,28
1,2020-02-07 22:08:45.819384,2020-02-07 22:09:13.219582,27.400198,4,1,0,5
2,2020-02-07 22:17:18.292342,2020-02-07 22:23:58.466970,400.174628,8,8,0,16
3,2020-02-08 10:12:22.110039,2020-02-08 10:21:55.373558,573.263519,9,4,3,16
4,2020-02-08 10:29:00.471188,2020-02-08 10:40:55.113595,714.642407,43,11,6,60
5,2020-02-08 10:46:49.793644,2020-02-08 10:59:12.634329,742.840685,6,46,3,55
6,2020-02-08 11:07:40.036686,2020-02-08 11:10:14.003028,153.966342,18,48,1,67
7,2020-02-08 11:51:43.995670,2020-02-08 12:40:41.753487,2937.757817,244,184,17,445
8,2020-02-08 12:45:49.563188,2020-02-08 13:23:33.799195,2264.236007,193,350,16,559
9,2020-02-08 13:41:25.099714,2020-02-08 14:10:32.699952,1747.600238,119,29,13,161


In [15]:
#10 min between commands in a sesssion
playsession_analysis(600)

Unnamed: 0,session_start,session_end,session_seconds,sequence,special,incorrect,total_commands
0,2020-02-07 21:22:43.411133,2020-02-07 21:31:16.524228,513.113095,23,5,0,28
1,2020-02-07 22:08:45.819384,2020-02-07 22:23:58.466970,912.647586,12,9,0,21
2,2020-02-08 10:12:22.110039,2020-02-08 11:10:14.003028,3471.892989,76,109,13,198
3,2020-02-08 11:51:43.995670,2020-02-08 13:23:33.799195,5509.803525,437,534,33,1004
4,2020-02-08 13:41:25.099714,2020-02-08 14:10:32.699952,1747.600238,119,29,13,161
