In [1]:
import pandas as pd
import csv
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import math
import ntpath
import scipy.stats as stats
import os
import glob

    
def load_files():
    mypath = r'Teams{}'
    allfiles = glob.glob(mypath.format("\*\*.csv"))
    # filenames
    filenames = [path_leaf(name,'tail') for name in glob.glob(mypath.format("\*\*.csv"))]
    # team names in list, same length as filenames
    teams = [path_leaf(name,'head') for name in glob.glob(mypath.format("\*\*.csv"))]
    # combine team name with the trial number. trial number can only be taken from the ordered list
    # this dataset is a bit messy in its notation we try to make that a bit better now
    dfteams = pd.DataFrame(teams,columns=['values'])
    dfteams = dfteams.groupby([dfteams['values'].ne(dfteams['values'].shift()).cumsum(), 'values']).size().reset_index(level=0, drop=True)
    # create a counter for the trial number. not every team has the same amount of trials 
    teamslist = [list(range(0, i)) for i in dfteams]
    teamslist = [item for sublist in teamslist for item in sublist]
    trials = ["{}_Round {}".format(team,trial) for team, trial in zip(teams,teamslist)]
    #load files, but only specific columns are needed
    column_list = ["Step","Incrimental Time","A-Position","A-Buttonstate","A-Haptic 1","B-Position","B-Buttonstate","B-Haptic 1"]
    df_list = [pd.read_csv(str(file),skiprows=2,nrows = 6000, usecols=column_list) for file in allfiles]
    return filenames, trials, df_list

# helper function to extract the team name and filename from the filestructure
# since everybody is constantly reinventing a database with folder structures this is needed
def path_leaf(path, head_or_tail='tail'):
    head, tail = ntpath.split(path)
    if head_or_tail== 'tail':
        return tail
    else:
        return ntpath.basename(head)

def Kojima_dataset():
    # we load the whole dataset with its reduced amount if columns
    filenames,trial_names,df_list = load_files()
    # the first is the current naming of columns, the second the one we will transfer it to
    column_list = ["Step","Incrimental Time","A-Position","A-Buttonstate","A-Haptic 1","B-Position","B-Buttonstate","B-Haptic 1"]
    new_colnames = ["Step","Incrimental Time","User 0Position","User 0Button","User 0VIB1","User 1Position","User 1Button","User 1VIB1"]
    dataframe_list = []
    # preprocessing of all files of the dataset
    for i in range(len(df_list)):
        df = df_list[i]
        #rename columns for same preprocessing functionality
        df.rename(columns={i:j for i,j in zip(column_list,new_colnames)}, inplace=True)
        preprocessing(df)
        df["Trial_name"] = trial_names[i]
        df["File_name"] = filenames[i]
        dataframe_list.append(df)
        df.to_csv(os.path.join('Teams','preprocessed_{}.csv'.format(trial_names[i])))
    return dataframe_list
    
    
### loading the files
def combined_file(filename):
    #### extract information for individual trials ####
    with open('{}'.format(filename)) as csvfile:
            csvreader = csv.reader(csvfile)
            rows =list(csvreader)
            # extracting the beginnings of each trial from the merged file as indices and file names
            header_indices = [i for i,row in enumerate(rows) if "Round" in row[0]]
            trial_names = [row[0] for i,row in enumerate(rows) if "Round" in row[0]]
            starting_indices = [i for i,row in enumerate(rows) if "Step" in row[0]]            
    dataframe_list = [] 
    
    if not os.path.exists('individual_{}'.format(filename)):
        os.makedirs('individual_{}'.format(filename))

    ### break big file into smaller files and do preprocessing
    for i in range(len(trial_names)):
        column_list=["Step","Incrimental Time","User 0Position","User 0Button","User 0VIB1","User 1Position","User 1Button","User 1VIB1"]
        df = pd.read_csv(str(filename),skiprows=starting_indices[i],nrows = 6000, usecols=column_list)
        preprocessing(df)
        df["Trial_name"] = trial_names[i]
        df["File_name"] = filename
        dataframe_list.append(df)
        df.to_csv(os.path.join('individual_{}'.format(filename),"preprocessed{}.csv".format(trial_names[i])))
    return dataframe_list

def unwrapped_distances(a,b):
    # to be fast we use numpy-only methods:dist = (max(a,b)-min(a,b)); (dist%(600*np.sign(300-dist)))*(np.sign(b-a))
    dist = (np.maximum(a,b)-np.minimum(a,b))
    # max - min will always be between 0 and 599, if it is <300 it is correct and %600 won't change it
    # if max - min >300 we need the opposite to get the smaller value, we can do that by using %-600
    # sign(300 - (max - min)) will do that job, but to avoid Nan we need to avoid 0, hence 300.1
    # the defined direction of walk is still needed,hence the last term. a is the User position and b the object
    # this defines it as "how has the user to walk to reach the object", minus is left, plus is right
    how_to_walk= ((dist%(600*np.sign(300.1-dist)))*(np.sign(b-a))).astype(int)
    return how_to_walk

def preprocessing(df):
    # we cold loop over users, but for readibility we will make it separately for now
    # delete rows that have Nan values and name them with warning
    print(df[df.isnull().any(axis=1)])
    df.dropna(axis=0,how='any',inplace=True)
    # bring values to positive values (some starting values were negative)
    df["User 0Position"] = df["User 0Position"]%600
    df["User 1Position"] = df["User 1Position"]%600
    
    ### VELOCITIES ###
    # velocities with direction
    df["User 0Velocity"] = unwrapped_distances(0,df["User 0Position"].diff().fillna(0).astype(int))
    df["User 1Velocity"] = unwrapped_distances(0,df["User 1Position"].diff().fillna(0).astype(int))
    # binary velocities
    df["User 0BinaryVelocity"] = np.sign(df["User 0Velocity"])
    df["User 1BinaryVelocity"] = np.sign(df["User 1Velocity"])
    
    ### ACCELERATIONS ###
    # accelerations (negative for break, postive for acceleration, independent of direction)
    df["User 0Acceleration"] = df["User 0Velocity"].abs().diff().fillna(0).astype(int)
    df["User 1Acceleration"] = df["User 1Velocity"].abs().diff().fillna(0).astype(int)
    # binary accelerations (-1 for break +1 for speeding up)
    df["User 0binaryAcceleration"] = np.sign(df["User 0Acceleration"])
    df["User 1binaryAcceleration"] = np.sign(df["User 1Acceleration"])
    # directed accelerations (overlapped by the travel direction -1 for left, +1 for right)
    df["User 0DirectedAcceleration"] = df["User 0Velocity"].diff().fillna(0).astype(int)
    df["User 1DirectedAcceleration"] = df["User 1Velocity"].diff().fillna(0).astype(int)
    
    ### COORDINATES ###
    # add static coordinate column
    df["User 0Static"] = 150
    df["User 1Static"] = 450
    # add shadow coordinate column and use modulo for unwrapping. 600 is the number of world size. 150 the lure offset
    df["User 0Shadow"] = (df["User 0Position"] + 150)%600
    df["User 1Shadow"] = (df["User 1Position"] + 150)%600
    
    ### differences to objects for each user to own static, shadow of other, avatar of other ###
    # think about that differences between -4 and 4 mean a vibration
    df["User 0Distance_Static"] = unwrapped_distances(df["User 0Position"], df["User 0Static"])
    df["User 0Distance_Shadow"] = unwrapped_distances(df["User 0Position"], df["User 1Shadow"])
    df["User 0Distance_Avatar"] = unwrapped_distances(df["User 0Position"], df["User 1Position"])
    df["User 1Distance_Static"] = unwrapped_distances(df["User 1Position"], df["User 1Static"])
    df["User 1Distance_Shadow"] = unwrapped_distances(df["User 1Position"], df["User 0Shadow"])
    df["User 1Distance_Avatar"] = unwrapped_distances(df["User 1Position"], df["User 0Position"])
    
    ### Vibrations ###
    # set initial vibrations to 0
    df.loc[:10, "User 0VIB1"] = 0
    df.loc[:10, "User 1VIB1"] = 0
    # set vibration values to 1 or 0
    df["User 0VIB1"] = df["User 0VIB1"].astype("bool").astype(int)
    df["User 1VIB1"] = df["User 1VIB1"].astype("bool").astype(int)
    # in case there is no vibrations we want to mark a failed trial
    try:
        # that line might be hard to digest, so in order of the line: df[[we select the 3 distance columns]][we apply a boolean mask where vibration is on].absolutevalues.columnNAMEofminumumvalue.splitString.takeTheLastPartOfTheString
        df["User 0VIB1Source"] = df[["User 0Distance_Static","User 0Distance_Shadow","User 0Distance_Avatar"]][list(df["User 0VIB1"].astype("bool"))].abs().idxmin(axis="columns").str.split("_").str[1]
    except:
        df["User 0VIB1Source"] = "no vibrations?!"
        print("no vibrations, probably a failed trial")
    try:
        df["User 1VIB1Source"] = df[["User 1Distance_Static","User 1Distance_Shadow","User 1Distance_Avatar"]][list(df["User 1VIB1"].astype("bool"))].abs().idxmin(axis="columns").str.split("_").str[1]
    except:
        df["User 1VIB1Source"] = "no vibrations?!"
        print("no vibrations, probably a failed trial")
        
    ### clicks ###
    # set only the first click value to 1 
    # first term will always be 0.5 during click and second will be 0.5 (first change) or -0.5 when button is released
    df["User 0Button"] = (0.5*(df["User 0Button"] + df["User 0Button"].diff()).fillna(0)).astype('int')  
    df["User 1Button"] = (0.5*(df["User 1Button"] + df["User 1Button"].diff()).fillna(0)).astype('int')
    # kill error clicks in the first 5 seconds
    df.loc[:500, "User 0Button"] = 0
    df.loc[:500, "User 1Button"] = 0
    # see if a player clicked: the sum over the column will be 1 for a made click and 0 if no click was made
    did_click_0 = df["User 0Button"].sum()
    did_click_1 = df["User 1Button"].sum()
    did_click = [did_click_0,did_click_1]
    print(did_click)

    ### loop over users 0 and 1
    for j in range(0,2):
        if did_click[j] == 1:
            # we only get one value but we want to write it in the whole column for overview
            df["User {}Click".format(j)] = df.loc[df["User {}Button".format(j)]== 1,"Incrimental Time"].values[0]
            # we might want to have a column that shows how much time till click happens
            df["User {}TimeUntilClick".format(j)] = df["Incrimental Time"] - df["User {}Click".format(j)]
            ### closest source approach
            target_columns = ["User {}Distance_Static".format(j),"User {}Distance_Shadow".format(j),"User {}Distance_Avatar".format(j)]
            within70 = (df[target_columns]<70).any(axis=1)
            buttonclick = list(df["User {}Button".format(j)].astype("bool"))
            try:
                df["User {}Click_ClosestAtClick".format(j)] = df[target_columns][buttonclick&within70].abs().idxmin(axis="columns").str.split("_").str[1].values[0]
            except:
                df["User {}Click_ClosestAtClick".format(j)] = "unknown"
            ### most stimulation in the lat 2 seconds approach
            # take the rows where time until click is between -2000(ms) and 0, group it by the vibration sources and [take only vibration source column]. count rows of each group. pick the name of the group with max counts
            try:
                stimulations_dict = dict(df[(df["User {}TimeUntilClick".format(j)]>-2000) & (df["User {}TimeUntilClick".format(j)] <0)].groupby("User {}VIB1Source".format(j))["User {}VIB1Source".format(j)].count())
                max_value = max(stimulations_dict.values())
                if max_value > 5:# maximum value with a small threshold to make sure that tehre was in deed interaction
                    max_keys = [k for k, v in stimulations_dict.items() if v == max_value][0]
                    df["User {}Click_mostStimulations".format(j)] = max_keys
                else:
                    df["User {}Click_mostStimulations".format(j)] = "tooFewStimWithin2Sec"
                df["User {}Click_mostStimulationsDictionary".format(j)] = str(stimulations_dict)
            except:
                df["User {}Click_mostStimulations".format(j)] = "noStimFor2Sec"
                df["User {}Click_mostStimulationsDictionary".format(j)] = "noStimFor2Sec"
            
        else:
            df["User {}Click".format(j)] = "NoClick"
            df["User {}TimeUntilClick".format(j)] = "NoClick"
            df["User {}Click_ClosestAtClick".format(j)] = "NoClick"
            df["User {}Click_mostStimulations".format(j)] = "NoClick"
            df["User {}Click_mostStimulationsDictionary".format(j)] = "NoClick"


In [2]:
def create_summary(lst,filename):
    df_summary = pd.DataFrame([])
    summary_columns=["Trial_name","File_name",
                     "User 0Click","User 0Click_ClosestAtClick","User 0Click_mostStimulations","User 0Click_mostStimulationsDictionary",
                     "User 1Click","User 1Click_ClosestAtClick","User 1Click_mostStimulations","User 1Click_mostStimulationsDictionary"]
    for column in summary_columns:
        df_summary[column] = [df[column][0] for df in lst]
    df_summary["User 0Points"] = [1 if ((i==j) & (i=="Avatar")) else 0 if (i=="NoClick") else -1 for i, j in zip(df_summary["User 0Click_ClosestAtClick"], df_summary["User 0Click_mostStimulations"])]
    df_summary["User 1Points"] = [1 if ((i==j) & (i=="Avatar")) else 0 if (i=="NoClick") else -1 for i, j in zip(df_summary["User 1Click_ClosestAtClick"], df_summary["User 1Click_mostStimulations"])]
    df_summary.to_csv(os.path.join(filename,"summary.csv"))

### for the current dataset, does preprocessing, saves preprocessed CSV and returns a list of dataframes for further work
filename = 'PCE_COMBINED_LOG 11,25,44 9-10-2020.csv'
df_list = combined_file(r'{}'.format(filename))
create_summary(df_list,'individual_{}'.format(filename))

### for the Kojima??? dataset. i dunno how the datset is called. you can change that if you want
#df_list = Kojima_dataset()
#create_summary(df_list,"Teams")

Empty DataFrame
Columns: [Step, Incrimental Time, User 0Position, User 0Button, User 0VIB1, User 1Position, User 1Button, User 1VIB1]
Index: []
[1, 0]
Empty DataFrame
Columns: [Step, Incrimental Time, User 0Position, User 0Button, User 0VIB1, User 1Position, User 1Button, User 1VIB1]
Index: []
[1, 0]
Empty DataFrame
Columns: [Step, Incrimental Time, User 0Position, User 0Button, User 0VIB1, User 1Position, User 1Button, User 1VIB1]
Index: []
[1, 1]
Empty DataFrame
Columns: [Step, Incrimental Time, User 0Position, User 0Button, User 0VIB1, User 1Position, User 1Button, User 1VIB1]
Index: []
[1, 1]
Empty DataFrame
Columns: [Step, Incrimental Time, User 0Position, User 0Button, User 0VIB1, User 1Position, User 1Button, User 1VIB1]
Index: []
[1, 1]
Empty DataFrame
Columns: [Step, Incrimental Time, User 0Position, User 0Button, User 0VIB1, User 1Position, User 1Button, User 1VIB1]
Index: []
[1, 1]
Empty DataFrame
Columns: [Step, Incrimental Time, User 0Position, User 0Button, User 0VIB1, Us

In [3]:
df1=df_list[0]
df1

Unnamed: 0,Step,Incrimental Time,User 0Position,User 0Button,User 0VIB1,User 1Position,User 1Button,User 1VIB1,User 0Velocity,User 1Velocity,...,User 0Click_ClosestAtClick,User 0Click_mostStimulations,User 0Click_mostStimulationsDictionary,User 1Click,User 1TimeUntilClick,User 1Click_ClosestAtClick,User 1Click_mostStimulations,User 1Click_mostStimulationsDictionary,Trial_name,File_name
0,0,0,301,0,0,211,0,0,0,0,...,Avatar,Avatar,{'Avatar': 78},NoClick,NoClick,NoClick,NoClick,NoClick,Trial 0 Round 0,"PCE_COMBINED_LOG 11,25,44 9-10-2020.csv"
1,1,11,299,0,0,211,0,0,-2,0,...,Avatar,Avatar,{'Avatar': 78},NoClick,NoClick,NoClick,NoClick,NoClick,Trial 0 Round 0,"PCE_COMBINED_LOG 11,25,44 9-10-2020.csv"
2,2,21,297,0,0,211,0,0,-2,0,...,Avatar,Avatar,{'Avatar': 78},NoClick,NoClick,NoClick,NoClick,NoClick,Trial 0 Round 0,"PCE_COMBINED_LOG 11,25,44 9-10-2020.csv"
3,3,31,294,0,0,211,0,0,-3,0,...,Avatar,Avatar,{'Avatar': 78},NoClick,NoClick,NoClick,NoClick,NoClick,Trial 0 Round 0,"PCE_COMBINED_LOG 11,25,44 9-10-2020.csv"
4,4,41,291,0,0,211,0,0,-3,0,...,Avatar,Avatar,{'Avatar': 78},NoClick,NoClick,NoClick,NoClick,NoClick,Trial 0 Round 0,"PCE_COMBINED_LOG 11,25,44 9-10-2020.csv"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5995,5995,60862,570,0,0,565,0,0,0,0,...,Avatar,Avatar,{'Avatar': 78},NoClick,NoClick,NoClick,NoClick,NoClick,Trial 0 Round 0,"PCE_COMBINED_LOG 11,25,44 9-10-2020.csv"
5996,5996,60872,570,0,0,565,0,0,0,0,...,Avatar,Avatar,{'Avatar': 78},NoClick,NoClick,NoClick,NoClick,NoClick,Trial 0 Round 0,"PCE_COMBINED_LOG 11,25,44 9-10-2020.csv"
5997,5997,60882,570,0,0,565,0,0,0,0,...,Avatar,Avatar,{'Avatar': 78},NoClick,NoClick,NoClick,NoClick,NoClick,Trial 0 Round 0,"PCE_COMBINED_LOG 11,25,44 9-10-2020.csv"
5998,5998,60892,570,0,0,565,0,0,0,0,...,Avatar,Avatar,{'Avatar': 78},NoClick,NoClick,NoClick,NoClick,NoClick,Trial 0 Round 0,"PCE_COMBINED_LOG 11,25,44 9-10-2020.csv"


In [24]:
from PyIF import te_compute as te
# source and target are strings after "User 0" (or 1), window of 0 means whole timeseries, which directions
def transfer_entropy(source, target="Velocity",window=5000, other_on_self = True, self_on_other= True, vibration_on_self=True,self_on_self=True,k=1,lag=1):
    desired = [other_on_self,self_on_other,vibration_on_self,self_on_self]
    interactions = [[],[],[],[]] # empty storage
    
    for df in df_list: # loop over all dataframes
        for user in range(0,2): # loop over users 0 and 1
            pairs = [(user,(user+1)%2),((user+1)%2,user),(user,user),(user,user)] # define user combination
            for i in range(len(interactions)): # loop over all types of interactions
                if desired[i]: # do only desired (True) interactions
                    if window > 0: # sliced timeseries
                        if df["User {}Click".format(pairs[i][0])][0]!= "NoClick": # only calculate TE for players with click
                            target_list = np.array(df[(df["User {}TimeUntilClick".format(pairs[i][0])]> -window) & (df["User {}TimeUntilClick".format(pairs[i][0])] <0)]["User {}{}".format(pairs[i][0],target)])
                            source_list = np.array(df[(df["User {}TimeUntilClick".format(pairs[i][0])]> -window) & (df["User {}TimeUntilClick".format(pairs[i][0])] <0)]["User {}{}".format(pairs[i][1],source)])
                            vibration_list = np.array(df[(df["User {}TimeUntilClick".format(pairs[i][0])]> -window) & (df["User {}TimeUntilClick".format(pairs[i][0])] <0)]["User {}VIB1".format(pairs[i][1])])
                        else:
                            continue
                    else: # full timeseries
                        target_list = np.array(df["User {}{}".format(pairs[i][0],target)])
                        source_list = np.array(df["User {}{}".format(pairs[i][1],source)])
                        vibration_list = np.array(df["User {}VIB1".format(pairs[i][1])])
                    # transfer entropy
                    if i == 2:
                        TE = te.te_compute(target_list,vibration_list, k=k, embedding=lag, safetyCheck=False, GPU=False)
                    else:
                        TE = te.te_compute(target_list,source_list, k=k, embedding=lag, safetyCheck=False, GPU=False)
                    interactions[i].append(TE)
    return interactions

In [36]:
#result = transfer_entropy("Velocity","Velocity",5000,True,True,True,True,4,12)
ks=[1,2,3,4,5,6,7,8,9,10]
lags=[1,2,3,4,5,6,7,8,9,10]
result=np.zeros(shape=(len(ks),len(lags)))

for i in range(len(ks)):
    for j in range(len(lags)):
        result[i,j] = np.mean(transfer_entropy("Velocity","BinaryVelocity",5000,False,False,False,True,ks[i],lags[j])[3])
result

array([[ 0.29114693,  0.26068091,  0.24891981,  0.2168376 ,  0.18994744,
         0.1638122 ,  0.1486382 ,  0.09829198,  0.0799726 ,  0.06584342],
       [ 0.28911456,  0.25868567,  0.22589235,  0.2171278 ,  0.15330631,
         0.11676925,  0.07319819,  0.00778699, -0.01565143, -0.04714796],
       [ 0.2817382 ,  0.25666119,  0.19764233,  0.1495422 ,  0.09272445,
         0.05262213,  0.03465832, -0.02493666, -0.06162147, -0.07465169],
       [ 0.27955862,  0.24138363,  0.18886637,  0.12944174,  0.05811485,
        -0.0097779 ,  0.01747853, -0.06188137, -0.01543518, -0.00525468],
       [ 0.28203988,  0.23007433,  0.17255182,  0.11609344,  0.07140941,
         0.01860046,  0.06396624,  0.08412545,  0.07629608,  0.12023635],
       [ 0.27684303,  0.21122324,  0.11332346,  0.12659459,  0.11857495,
         0.0875126 ,  0.12370693,  0.1277512 ,  0.12688022,  0.18727432],
       [ 0.25912994,  0.20746833,  0.1250771 ,  0.09862187,  0.15815087,
         0.16920713,  0.14259239,  0.2297746 

In [31]:
ind = np.unravel_index(np.argmax(result, axis=None), result.shape)
ind

(0, 0)

In [None]:
b = [True,True,False,True]
a = [[],[],[],[]]
for a 
for j in range(5):
    for i in range(4):
        if b[i]:
            a[i].append (j)

In [None]:
dataf = [1,2,3]
des = [True,False,True]
empty = [[],[],[]]
for d in dataf:
    for user in range(0,2):
        pairs =[(user,(user+1)%2),((user+1)%2,user),(user,user)]
        for i in range(3):
            if des[i]:
                a= pairs[i][0]
                b= pairs[i][1]
            else:
                continue
            empty[i].append("target {} source {}".format(a,b))
empty

In [None]:
t=np.array([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1])
s=np.array([1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1])
TE = te.te_compute(t,s, k=5, embedding=5, safetyCheck=False, GPU=False)
TE