In [1]:
import pandas as pd
import os
import glob

### Count operations in forced and voting commits

In [2]:
# forced commit
forced_tasks_names= [
    ["Log-16-01-2023_17-25-09", "1C"], 
    ["Log-16-01-2023_17-21-57", "1S"],
    ["Log-17-01-2023_16-57-25", "3C"],
    ["Log-17-01-2023_16-57-09", "3S"],
    ["Log-18-01-2023_16-11-16", "5C"],
    ["Log-18-01-2023_16-11-08", "5S"],
    ["Log-19-01-2023_12-45-29", "7C"],
    ["Log-19-01-2023_12-45-20", "7S"],
    ["Log-20-01-2023_14-08-04", "9C"],
    ["Log-20-01-2023_14-08-26", "9S"],
    ["Log-17-01-2023_16-06-17", "2C"],
    ["Log-17-01-2023_16-06-26", "2S"],
    ["Log-18-01-2023_15-32-39", "4C"],
    ["Log-18-01-2023_15-33-03", "4S"],
    ["Log-19-01-2023_11-31-36", "6C"],
    ["Log-19-01-2023_11-31-25", "6S"],
    ["Log-19-01-2023_14-56-36", "8C"],
    ["Log-19-01-2023_14-56-33", "8S"],
    ["Log-20-01-2023_17-38-14", "10C"],
    ["Log-20-01-2023_17-38-11", "10S"]
]

# voting commit
voting_tasks_names=[
    ["Log-17-01-2023_15-52-12", "2C"],
    ["Log-17-01-2023_15-52-19", "2S"],
    ["Log-18-01-2023_15-19-49", "4C"],
    ["Log-18-01-2023_15-20-04", "4S"],
    ["Log-18-01-2023_17-43-43", "6C"],
    ["Log-18-01-2023_17-43-21", "6S"],
    ["Log-19-01-2023_14-40-23", "8C"],
    ["Log-19-01-2023_14-40-43", "8S"],
    ["Log-20-01-2023_17-21-15", "10C"],
    ["Log-20-01-2023_17-20-28", "10S"],
    ["Log-16-01-2023_17-41-21", "1C"],
    ["Log-16-01-2023_17-40-47", "1S"],
    ["Log-17-01-2023_17-13-31", "3C"],
    ["Log-17-01-2023_17-13-19", "3S"],
    ["Log-18-01-2023_16-25-03", "5C"],
    ["Log-18-01-2023_16-24-56", "5S"],
    ["Log-19-01-2023_13-07-10", "7C"],
    ["Log-19-01-2023_13-07-01", "7S"],
    ["Log-20-01-2023_14-25-38", "9C"],
    ["Log-20-01-2023_14-25-44", "9S"]
]

In [3]:
forced_tasks_operations_df = []
voting_tasks_operations_df = []

forced_tasks_operations_df.clear()
voting_tasks_operations_df.clear()

# use glob to get all the csv files in the folder
path = "UserLogs/CleanedLogs/Operations"
csv_files = glob.glob(os.path.join(path, "*.txt"))
  
# loop over the list of csv files
for f in csv_files:
      
    # read the csv file
    columns_names= ['DateTime', 'OperationType', 'ObjGuid', 'ObjName']
    df = pd.read_csv(f, sep=";", header=None, names = columns_names)
    
    # print the location and filename
    filename= f.split("\\")[-1]
    '''
    print('Location:', f)    
    print('File Name:', filename)
      
    # print the content
    
    print('Content:')    
    display(df)
    '''
    
    # fill lists
    if any(x[0] in filename for x in forced_tasks_names): #if current file is a forced task log
        forced_tasks_operations_df.append(df);
    if any(x[0] in filename for x in voting_tasks_names): #if current file is a voting task log
        voting_tasks_operations_df.append(df);

#### Forced tasks operations

In [4]:
# forced tasks - count
'''
* forced commits 
& forced deletion 
'''

forced_counts_series = pd.Series(dtype = 'object')
forced_counts_series = forced_counts_series[0:0]

for df in forced_tasks_operations_df:
    forced_counts_series = forced_counts_series.add(df["OperationType"].value_counts(), fill_value=0)
    #display(df)
    
print(forced_counts_series)

AcceptDeletion            1.0
ForcedCommit            131.0
ForcedGlobalDeletion      3.0
RequestCommit             7.0
dtype: object


#### Voting tasks operations

In [5]:
# voting tasks - count
'''
% request commits
! request commit accepted
$ request commit rejected

? request deletion
+ request deletion accepted
= request deletion rejected
'''

voting_counts_series = pd.Series(dtype = 'object')
voting_counts_series = voting_counts_series[0:0]

for df in voting_tasks_operations_df:
    voting_counts_series = voting_counts_series.add(df["OperationType"].value_counts(), fill_value=0)
    #display(df)
    
print(voting_counts_series)

AcceptCommit              66.0
AcceptDeletion             1.0
DeclineCommit              3.0
ForcedCommit               3.0
RequestCommit            114.0
RequestGlobalDeletion      1.0
dtype: object


### Time in Global or Local layer

In [6]:
forced_tasks_logs_df = []
voting_tasks_logs_df = []

forced_tasks_logs_df.clear()
voting_tasks_logs_df.clear()

# use glob to get all the csv files in the folder
path = "UserLogs/CleanedLogs/UserLogs"
csv_files = glob.glob(os.path.join(path, "*.txt"))

# loop over the list of csv files
for f in csv_files:
      
    # read the csv file
    columns_names= ['DateTime', 'SequenceNum', 'CurrentLayer', 'GazeDirection', 'GazeOrigin',
                'GazeTargetObjGuid', 'GazeTargetObjName', 'HeadPosition', 'Headdirection',
                'HeadRotation', 'CurrentManipObjGuid', 'CurrentManipObjName'];

    df = pd.read_csv(f, sep=";", header=None, names = columns_names)
    
    # print the location and filename
    filename= f.split("\\")[-1]
    '''
    print('Location:', f)    
    print('File Name:', filename)
      
    # print the content
    print('Content:')    
    display(df)
    '''
    
    # fill lists        
    if any((match := substring)[0] in filename for substring in forced_tasks_names):
        df["FileName"] = match[0]
        df["UserId"] = match[1]
        forced_tasks_logs_df.append(df);
        
    if any((match := substring)[0] in filename for substring in voting_tasks_names): #if current file is a voting task log
        df["FileName"] = match[0]
        df["UserId"] = match[1]
        voting_tasks_logs_df.append(df);

#### Forced tasks times in total

In [7]:
# forced tasks - count total time

forced_count_time_series = pd.Series(dtype = 'object')
forced_count_time_series = forced_count_time_series[0:0]

for df in forced_tasks_logs_df:
    forced_count_time_series = forced_count_time_series.add(df["CurrentLayer"].value_counts(), fill_value=0)
    #display(df)
    
print(forced_count_time_series)

print("\nPercentage:")
global_time_forced = forced_count_time_series["GlobalLayer"]
local_time_forced = forced_count_time_series["LocalLayer"]

global_perc = (global_time_forced * 100)/(global_time_forced + local_time_forced)
print("In global layer:", global_perc)

local_perc = (local_time_forced * 100)/(global_time_forced + local_time_forced)
print("In local layer:", local_perc)

LocalLayer     6594
GlobalLayer    2168
dtype: object

Percentage:
In global layer: 24.743209312942252
In local layer: 75.25679068705774


#### Voting tasks times in total

In [8]:
# voting tasks - count total time

voting_count_time_series = pd.Series(dtype = 'object')
voting_count_time_series = voting_count_time_series[0:0]

for df in voting_tasks_logs_df:
    voting_count_time_series = voting_count_time_series.add(df["CurrentLayer"].value_counts(), fill_value=0)
    #display(df)
    
print(voting_count_time_series)

print("\nPercentage:")
global_time_voting = voting_count_time_series["GlobalLayer"]
local_time_voting = voting_count_time_series["LocalLayer"]

global_perc = (global_time_voting * 100)/(global_time_voting + local_time_voting)
print("In global layer:", global_perc)

local_perc = (local_time_voting * 100)/(global_time_voting + local_time_voting)
print("In local layer:", local_perc)

GlobalLayer    3446
LocalLayer     6424
dtype: object

Percentage:
In global layer: 34.91388044579534
In local layer: 65.08611955420466


#### Forced tasks times by user

In [9]:
# forced tasks - count user time

forced_count_time_user_df_list = []
forced_count_time_user_df_list.clear()

forced_count_time_user_df = pd.DataFrame()

for df in forced_tasks_logs_df:
    forced_count_time_user_df = df["CurrentLayer"].value_counts().to_frame();
    forced_count_time_user_df.rename(columns = {"CurrentLayer": "Count"})
    forced_count_time_user_df["UserId"] = df["UserId"][0]
    forced_count_time_user_df["FileName"] = df["FileName"][0]
    forced_count_time_user_df["Percentage"] = 0
    
    global_time_user_forced = forced_count_time_user_df.loc["GlobalLayer", "CurrentLayer"]
    local_time_user_forced = forced_count_time_user_df.loc["LocalLayer", "CurrentLayer"]
    
    forced_count_time_user_df.loc["GlobalLayer", "Percentage"] = (global_time_user_forced * 100)/(global_time_user_forced + local_time_user_forced);
    forced_count_time_user_df.loc["LocalLayer", "Percentage"] = (local_time_user_forced * 100)/(global_time_user_forced + local_time_user_forced);
    #display(forced_count_time_user_df)
    
    forced_count_time_user_df_list.append(forced_count_time_user_df)
    forced_count_time_user_df = pd.DataFrame

In [10]:
# voting tasks - count user time

voting_count_time_user_df_list = []
voting_count_time_user_df_list.clear()

voting_count_time_user_df = pd.DataFrame()

for df in voting_tasks_logs_df:
    voting_count_time_user_df = df["CurrentLayer"].value_counts().to_frame();
    voting_count_time_user_df["UserId"] = df["UserId"][0]
    voting_count_time_user_df["FileName"] = df["FileName"][0]
    voting_count_time_user_df["Percentage"] = 0
    
    global_time_user_voting = voting_count_time_user_df.loc["GlobalLayer", "CurrentLayer"]
    local_time_user_voting = voting_count_time_user_df.loc["LocalLayer", "CurrentLayer"]
    
    voting_count_time_user_df.loc["GlobalLayer", "Percentage"] = (global_time_user_voting * 100)/(global_time_user_voting + local_time_user_voting);
    voting_count_time_user_df.loc["LocalLayer", "Percentage"] = (local_time_user_voting * 100)/(global_time_user_voting + local_time_user_voting);
    #display(voting_count_time_user_df)
    
    voting_count_time_user_df_list.append(voting_count_time_user_df)
    voting_count_time_user_df = pd.DataFrame

In [11]:
# create dataframes indexed by user with percentages

count_time_user_df = pd.DataFrame()

count_time_user_df["UserId"] = ''
count_time_user_df["GlobalTime"] = 0
count_time_user_df["GlobalTimePerc"] = 0
count_time_user_df["LocalTime"] = 0
count_time_user_df["LocalTimePerc"] = 0
count_time_user_df["LocalTime"] = 0
count_time_user_df["TaskType"] = ''
count_time_user_df["Filename"] = ''

for df in forced_count_time_user_df_list:
    toAdd = {"UserId": df.loc["GlobalLayer", "UserId"], 
            "GlobalTime": df.loc["GlobalLayer", "CurrentLayer"],
            "GlobalTimePerc": df.loc["GlobalLayer", "Percentage"],
            "LocalTime": df.loc["LocalLayer", "CurrentLayer"],
            "LocalTimePerc": df.loc["LocalLayer", "Percentage"],
            "TaskType": "Forced",
            "Filename": df.loc["GlobalLayer", "FileName"]
           }
    
    toAdd_df = pd.DataFrame(toAdd, index = [0])
    count_time_user_df = pd.concat([count_time_user_df, toAdd_df], ignore_index = True)

for df in voting_count_time_user_df_list:
    toAdd = {"UserId": df.loc["GlobalLayer", "UserId"], 
            "GlobalTime": df.loc["GlobalLayer", "CurrentLayer"],
            "GlobalTimePerc": df.loc["GlobalLayer", "Percentage"],
            "LocalTime": df.loc["LocalLayer", "CurrentLayer"],
            "LocalTimePerc": df.loc["LocalLayer", "Percentage"],
            "TaskType": "Voting",
            "Filename": df.loc["GlobalLayer", "FileName"]
           }
    
    toAdd_df = pd.DataFrame(toAdd, index = [0])    
    count_time_user_df = pd.concat([count_time_user_df, toAdd_df], ignore_index = True)

    
count_time_user_df.sort_values(by=['UserId'])
count_time_user_df

Unnamed: 0,UserId,GlobalTime,GlobalTimePerc,LocalTime,LocalTimePerc,TaskType,Filename
0,1S,213,35.5,387,64.5,Forced,Log-16-01-2023_17-21-57
1,1C,224,37.333333,376,62.666667,Forced,Log-16-01-2023_17-25-09
2,2C,57,18.811881,246,81.188119,Forced,Log-17-01-2023_16-06-17
3,2S,81,26.732673,222,73.267327,Forced,Log-17-01-2023_16-06-26
4,3S,128,27.947598,330,72.052402,Forced,Log-17-01-2023_16-57-09
5,3C,59,12.882096,399,87.117904,Forced,Log-17-01-2023_16-57-25
6,4C,140,42.813456,187,57.186544,Forced,Log-18-01-2023_15-32-39
7,4S,77,23.547401,250,76.452599,Forced,Log-18-01-2023_15-33-03
8,5S,69,24.468085,213,75.531915,Forced,Log-18-01-2023_16-11-08
9,5C,66,23.404255,216,76.595745,Forced,Log-18-01-2023_16-11-16


#### Group by user

In [12]:
count_time_user_df.groupby(["UserId"]).mean(numeric_only=True)

Unnamed: 0_level_0,GlobalTime,GlobalTimePerc,LocalTime,LocalTimePerc
UserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10C,88.0,17.05423,428.5,82.94577
10S,205.0,39.759027,311.5,60.240973
1C,201.0,36.466667,349.0,63.533333
1S,212.5,38.95,337.5,61.05
2C,84.5,19.624919,341.0,80.375081
2S,162.0,35.537869,263.5,64.462131
3C,90.0,19.855682,364.5,80.144318
3S,127.0,27.942757,327.5,72.057243
4C,140.0,38.690678,226.0,61.309322
4S,134.0,35.353947,232.0,64.646053
