In [1]:
import numpy as np
import pandas as pd
#import itertools as it
#from sklearn.cluster import KMeans

In [2]:
def df_to_latex(df, filename):
    
    with open(filename, "w") as f:
        f.write(df.to_latex(index=False, float_format="%.3f"))

In [3]:
#returns a list of all traces corresponding to cases without the ID (first entry in trace)
def getCases(caseswithID):
    cases = []
    for case in caseswithID:
        cases.append(case[1:])
    return cases

#returns a list of traces where each trace contains only the names of activities and no timestamp
def getTraces(cases):
    traces = []
    for case in cases:
        trace = [x[0] for x in case]
        traces.append(trace)
    return traces

#given original case (trace), returns the sequence of events
def singleTrace(case):
    noID = case[1:]
    sequence = [x[0] for x in noID]
    return sequence


In [4]:
#returns the list of all prefixes for a given case
def getPrefixesForCase(case):
    prefixes = []
    trace = singleTrace(case)
    for i in range(len(trace)+1):
            prefix = trace[:i]
            prefixes.append(prefix)
    return prefixes

#returns list with no duplicates of all prefixes of traces in the input
def getPrefixes(traces):
    prefixes = []
    for trace in traces:
        for i in range(len(trace)+1):
            prefix = trace[:i]
            prefixes.append(prefix)
    prefixes_noduplicates = []
    for element in prefixes:
        if element not in prefixes_noduplicates:
            prefixes_noduplicates.append(element)
    return prefixes_noduplicates

#returns a list of entries (prefix,caseIDs) containig this prefix 
def getPrefixesIDs(caseswithIDs):
    removedIDs = getCases(caseswithIDs)
    all_traces = getTraces(removedIDs)
    all_prefixes = getPrefixes(all_traces)
    prefixesIDs = [(x,[]) for x in all_prefixes]
    for case in caseswithIDs:
        ID = case[0]
        prefixes_case = getPrefixesForCase(case)
        for elem in prefixes_case:
            index = all_prefixes.index(elem)
            prefixesIDs[index][1].append(ID)
    
    return prefixesIDs

In [5]:
#computes the multiset abstraction for a given (partial) trace
def ms_abstraction(prefix):
    prefix_ms = []
    pos = 0
    for string in prefix:
        occurrences = prefix.count(string)
        prefix_ms.insert(pos,(string,occurrences))
        pos += 1 
    prefix_ms_noduplicates = set(prefix_ms)
    return prefix_ms_noduplicates
    


#calculates states using multiset abstraction and yields the list of states
def getStates(prefixes):
    states = []
    states_noduplicates = []
    for prefix in prefixes:
        prefix_ms_noduplicates = ms_abstraction(prefix)
        states.append(prefix_ms_noduplicates)   
        for state in states:
            if state not in states_noduplicates:
                states_noduplicates.append(state)
    return states_noduplicates

#returns list of CaseIDs for the cases whose traces are mapped to the given state
def getStateIDs(state, prefixesIDs):
    IDs = []
    for (prefix, ids) in prefixIDs:
        if state == ms_abstraction(prefix):
            IDs.extend(ids)
    return IDs

#returns list containing only the cases with the given IDs
def filterIDs(IDs,caseswithIDs):
    cases = []
    for case in caseswithIDs:
        if case[0] in IDs:
            cases.append(case)
    return cases

In [6]:
#computes prediction (average rt, average grade) for given prefix
def pred_prefix(prefix,prefixIDs,caseswithIDs):
    prefixes = [p[0] for p in prefixIDs]
    index = prefixes.index(prefix)
    IDs = prefixIDs[index][1]
    cases = filterIDs(IDs,caseswithIDs)
    RTs = []
    grades = []
    
    for case in cases:
        last_case = case[-1]
        rt_2 = last_case[1]
        length = len(prefix)
        if length > 0:
            last_prefix = case[length]
            rt_1 = last_prefix[1]
            RTs.append(rt_2-rt_1)
        else:
            RTs.append(rt_2)
    
    for ID in IDs:
        row = data.index[data['CaseID'] == ID].tolist()[0]
        grade = data.iloc[row,5]
        grades.append(grade)
    
    avg_RT = sum(RTs)/len(RTs)
    avg_grade = sum(grades)/len(grades)
    return (avg_RT,avg_grade)

In [7]:
#returns list containing all prefixes that are mapped to a certain state
def prefixes_for_state(state,prefixesIDs):
    prefixes_for_state = []
    prefixes = [p[0] for p in prefixIDs]
    for p in prefixes:
        if ms_abstraction(p) == state:
            prefixes_for_state.append(p)
    return prefixes_for_state

#returns annotation (average rt, average grade) for given state
def annotation_state(state,prefixesIDs,caseswithIDs):
    prefixes = prefixes_for_state(state,prefixesIDs)
    predictions = []
    for p in prefixes:
        predictions.append(pred_prefix(p,prefixesIDs,caseswithIDs))
    RTs = [pred[0] for pred in predictions]
    grades = [pred[1] for pred in predictions]
    
    avg_RT = sum(RTs)/len(RTs)
    avg_grade = sum(grades)/len(grades)
    
    return (avg_RT,avg_grade)


In [8]:
#computes value for state and trace
def value(state,trace,prefixesIDs,caseswithIDs):
    prefixes_state = prefixes_for_state(state,prefixesIDs)
    prefixes_trace = getPrefixesForCase(trace)
    common_prefixes = []
    for p in prefixes_trace:
        if p in prefixes_state:
            common_prefixes.append(p)
    if len(common_prefixes) == 0:
        return None
    else:
        sequence = []
        for cp in common_prefixes:
            sequence.append(pred_prefix(cp,prefixesIDs,caseswithIDs))
            seq1 = [s[0] for s in sequence]
            avg1 = sum(seq1)/len(seq1)
            seq2 = [s[1] for s in sequence]
            avg2 = sum(seq2)/len(seq2)
        return (avg1,avg2)

In [9]:
case0 = ["0",("I",1),("M",40),("M",60),("F",85),("M",95),("S",100)]
case1 = ["1",("I",1),("M",30),("M",55),("F",80),("R",90),("S",95)]
case2 = ["2",("I",1),("M",30),("M",50),("F",75),("R",85),("S",90)]
case3 = ["3",("I",1),("M",40),("M",60),("R",80),("F",90),("S",100)]
case4 = ["4",("I",1),("M",45),("R",60),("F",85),("S",105)]
case5 = ["5",("I",1),("M",45),("R",70),("F",90),("S",100)]
case6 = ["6",("I",1),("M",40),("M",60),("F",80),("M",90),("S",100)]
case7 = ["7",("I",1),("M",35),("R",60),("M",90),("S",100)]
case8 = ["8",("I",1),("M",50),("F",70),("M",90),("M",95),("S",110)]
case9 = ["9",("I",1),("M",40),("F",60),("R",80),("S",95)]

In [10]:
casesWithID = [case0, case1, case2, case3, case4, case5, case6, case7, case8, case9]
cases = getCases(casesWithID)
traces = getTraces(cases)
prefixes = getPrefixes(traces)
prefixIDs = getPrefixesIDs(casesWithID)
states = getStates(prefixes)

In [11]:
data = pd.DataFrame(columns=['CaseID','Trace','PK','Field','Supervisor','Final Grade'])
data['CaseID'] = [case[0] for case in casesWithID]
data['Trace'] = traces
data['PK'] = ['average','high','high','high','average','high','average','high','average','high']
data['Field'] = ['SE','DS','DS','DS','SE','SE','DS','SE','DS','SE']
data['Supervisor'] = ['John','John','John','Brown','Brown','Brown','John','John','John','Brown']
data['Final Grade'] = [0.7, 0.85, 0.95, 0.9, 0.8, 0.75, 0.7, 0.7, 0.6, 0.8]


In [12]:
table = pd.DataFrame(columns=['Prefix','Prediction for prefix', 'CaseIDs containing prefix', 'Corresponding state', 'CaseIDs containing state', 'Annotation for state'])
table['Prefix'] = prefixes
table['Corresponding state'] = [ms_abstraction(prefix) for prefix in prefixes]
table['CaseIDs containing prefix'] = [p[1] for p in prefixIDs]

table1 = table.copy()
for i in range(len(table)):
    state = ms_abstraction(table.iat[i, 0])
    IDs = getStateIDs(state,prefixIDs)
    table1.iloc[i,4] = IDs

table2 = table1.copy()
for i in range(len(table)):
    prefix = table.iat[i,0]
    pair = pred_prefix(prefix,prefixIDs,casesWithID)
    table2.iloc[i,1] = pair

table3 = table2.copy()    
for i in range(len(table)):
    prefix = table.iat[i,0]
    state = ms_abstraction(prefix)
    pair = annotation_state(state,prefixIDs,casesWithID)
    table3.iloc[i,5] = pair
    

In [13]:
data

Unnamed: 0,CaseID,Trace,PK,Field,Supervisor,Final Grade
0,0,"[I, M, M, F, M, S]",average,SE,John,0.7
1,1,"[I, M, M, F, R, S]",high,DS,John,0.85
2,2,"[I, M, M, F, R, S]",high,DS,John,0.95
3,3,"[I, M, M, R, F, S]",high,DS,Brown,0.9
4,4,"[I, M, R, F, S]",average,SE,Brown,0.8
5,5,"[I, M, R, F, S]",high,SE,Brown,0.75
6,6,"[I, M, M, F, M, S]",average,DS,John,0.7
7,7,"[I, M, R, M, S]",high,SE,John,0.7
8,8,"[I, M, F, M, M, S]",average,DS,John,0.6
9,9,"[I, M, F, R, S]",high,SE,Brown,0.8


In [14]:
df_to_latex(data, "data.tex")

In [15]:
table3

Unnamed: 0,Prefix,Prediction for prefix,CaseIDs containing prefix,Corresponding state,CaseIDs containing state,Annotation for state
0,[],"(99.5, 0.775)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]",{},"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]","(99.5, 0.775)"
1,[I],"(98.5, 0.775)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]","{(I, 1)}","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]","(98.5, 0.775)"
2,"[I, M]","(60.0, 0.775)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]","{(M, 1), (I, 1)}","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]","(60.0, 0.775)"
3,"[I, M, M]","(40.0, 0.82)","[0, 1, 2, 3, 6]","{(M, 2), (I, 1)}","[0, 1, 2, 3, 6]","(40.0, 0.82)"
4,"[I, M, M, F]","(16.25, 0.8)","[0, 1, 2, 6]","{(F, 1), (M, 2), (I, 1)}","[0, 1, 2, 6, 8]","(18.125, 0.7)"
5,"[I, M, M, F, M]","(7.5, 0.7)","[0, 6]","{(F, 1), (M, 3), (I, 1)}","[0, 6, 8]","(11.25, 0.6499999999999999)"
6,"[I, M, M, F, M, S]","(0.0, 0.7)","[0, 6]","{(F, 1), (S, 1), (M, 3), (I, 1)}","[0, 6, 8]","(0.0, 0.6499999999999999)"
7,"[I, M, M, F, R]","(5.0, 0.8999999999999999)","[1, 2]","{(F, 1), (M, 2), (R, 1), (I, 1)}","[1, 2, 3]","(7.5, 0.8999999999999999)"
8,"[I, M, M, F, R, S]","(0.0, 0.8999999999999999)","[1, 2]","{(F, 1), (R, 1), (S, 1), (M, 2), (I, 1)}","[1, 2, 3]","(0.0, 0.8999999999999999)"
9,"[I, M, M, R]","(20.0, 0.9)",[3],"{(M, 2), (R, 1), (I, 1)}","[3, 7]","(15.0, 0.8)"


In [16]:
df_to_latex(table3, "table3.tex")

In [17]:
cluster_table = pd.DataFrame(columns=['Prefix','Case0','Case1','Case2','Case3','Case4','Case5','Case6','Case7','Case8','Case9'])
cluster_table['Prefix'] = prefixes
cols = cluster_table.columns[1:]

for c in range(len(cols)):
    case = casesWithID[c]
    prefixes = getPrefixesForCase(case)
    for i in range(len(cluster_table)):
        p = cluster_table.iat[i,0]
        if p in prefixes:
            cluster_table.iloc[i,c+1] = table3.iat[i,1]
        else:
            cluster_table.iloc[i,c+1] = None
    

In [18]:
#cluster_table

In [19]:
cluster_table2 = pd.DataFrame(columns=['States','Case0','Case1','Case2','Case3','Case4','Case5','Case6','Case7','Case8','Case9'])
cluster_table2['States'] = states
cols = cluster_table2.columns[1:]

for c in range(len(cols)):
    case = casesWithID[c]
    #prefixes = getPrefixesForCase(case)
    for i in range(len(cluster_table2)):
        s = cluster_table2.iat[i,0]
        cluster_table2.iloc[i,c+1] = value(s,case,prefixIDs,casesWithID)
            

In [20]:
cluster_table2

Unnamed: 0,States,Case0,Case1,Case2,Case3,Case4,Case5,Case6,Case7,Case8,Case9
0,{},"(99.5, 0.775)","(99.5, 0.775)","(99.5, 0.775)","(99.5, 0.775)","(99.5, 0.775)","(99.5, 0.775)","(99.5, 0.775)","(99.5, 0.775)","(99.5, 0.775)","(99.5, 0.775)"
1,"{(I, 1)}","(98.5, 0.775)","(98.5, 0.775)","(98.5, 0.775)","(98.5, 0.775)","(98.5, 0.775)","(98.5, 0.775)","(98.5, 0.775)","(98.5, 0.775)","(98.5, 0.775)","(98.5, 0.775)"
2,"{(M, 1), (I, 1)}","(60.0, 0.775)","(60.0, 0.775)","(60.0, 0.775)","(60.0, 0.775)","(60.0, 0.775)","(60.0, 0.775)","(60.0, 0.775)","(60.0, 0.775)","(60.0, 0.775)","(60.0, 0.775)"
3,"{(M, 2), (I, 1)}","(40.0, 0.82)","(40.0, 0.82)","(40.0, 0.82)","(40.0, 0.82)",,,"(40.0, 0.82)",,,
4,"{(F, 1), (M, 2), (I, 1)}","(16.25, 0.8)","(16.25, 0.8)","(16.25, 0.8)",,,,"(16.25, 0.8)",,"(20.0, 0.6)",
5,"{(F, 1), (M, 3), (I, 1)}","(7.5, 0.7)",,,,,,"(7.5, 0.7)",,"(15.0, 0.6)",
6,"{(F, 1), (S, 1), (M, 3), (I, 1)}","(0.0, 0.7)",,,,,,"(0.0, 0.7)",,"(0.0, 0.6)",
7,"{(F, 1), (M, 2), (R, 1), (I, 1)}",,"(5.0, 0.8999999999999999)","(5.0, 0.8999999999999999)","(10.0, 0.9)",,,,,,
8,"{(F, 1), (R, 1), (S, 1), (M, 2), (I, 1)}",,"(0.0, 0.8999999999999999)","(0.0, 0.8999999999999999)","(0.0, 0.9)",,,,,,
9,"{(M, 2), (R, 1), (I, 1)}",,,,"(20.0, 0.9)",,,,"(10.0, 0.7)",,


In [21]:
df_to_latex(cluster_table2, "cluster_table2.tex")

In [22]:
#returns positions of None in a vector 
def getNones(vector):
    length = len(vector)
    no_nones = [item for item in vector if item is not None]
    pos_no_none = []
    for elem in set(no_nones):
        posi = [i for i in range(length) if vector[i] == elem]
        pos_no_none.extend(posi)
    pos_nones = (set(range(length)) - set(pos_no_none))
    return pos_nones

#given vector [(i1,j1),...,(in,jn)] returns [i1,..,in,...,j1,...,jn]
def vector(vector_of_tuples):
    pos_nones = getNones(vector_of_tuples)
    p0 = []
    p1 = []
    length = len(vector_of_tuples)
    for i in range(length):
        if i in pos_nones:
            p0.insert(i,None)
            #if None in position i (for first entry), then also in position i+length (second entry)
            p1.insert(i+length,None) 
        else:
            p0.insert(i,vector_of_tuples[i][0])
            p1.insert(i,vector_of_tuples[i][1])
            
    vector = p0 + p1
    return vector

In [23]:
#def getMaxValues():    
    
all_vectors = []
no_cols = len(cluster_table2.columns[1:])
for i in range(1,no_cols+1):
    column = cluster_table2.iloc[:,i].tolist()
    v = vector(column)
    all_vectors.append(v)

entries1max = []
entries2max = []
entries1min = []
entries2min = []

for v in all_vectors:
    no_nones = [item for item in v if item is not None]
    l = int(len(no_nones)/2)
    v1 = no_nones[:l]
    v2 = no_nones[l:]
    entries1max.append(max(v1))
    entries1min.append(min(v1))
    entries2max.append(max(v2))
    entries2min.append(min(v2))

max0 = max(entries1max)
min0 = min(entries1min)
max1 = max(entries2max)
min1 = min(entries2min)

    #return (max0,max1)


#knowing the maximal values, we normalize each input vector of length 2*no_of_states
def normalize(vector,max0,max1,min0,min1): 
    
    v_normalized = []
    pos_nones = getNones(vector)
    
    for i in range(len(vector)):
        if i in pos_nones:
            v_normalized.insert(i,None)
        else:
            if i < int(len(vector)/2):
                entry = (vector[i]-min0)/(max0-min0)
                v_normalized.insert(i,entry)
            else:
                entry = (vector[i]-min1)/(max1-min1)
                v_normalized.insert(i,entry)      
    return v_normalized  

#distance between two vectors of length 2*no_states
def distance(vector1,vector2):
    distances1 = []
    distances2 = []
    
    length = len(vector1)
    if length!=len(vector2):
        return "Vectors do not have the same length!"
    else:
        pos_nones1 = getNones(vector1)
        pos_nones2 = getNones(vector2)
        
        common_nones = set(pos_nones1)&set(pos_nones2)
        v1_only_nones = set(pos_nones1)-set(pos_nones2)
        v2_only_nones = set(pos_nones2)-set(pos_nones1)
    
        for i in range(length):
            if i < int(length/2):
                if i in common_nones:
                    distances1.insert(i,0.0)
                else:
                    if i in v1_only_nones or i in v2_only_nones:
                        distances1.insert(i,2)
                    else:
                        d = (vector1[i]-vector2[i])**2
                        distances1.insert(i,d)
            else:
                if i in common_nones:
                    distances2.insert(i,0.0)
                else:
                    if i in v1_only_nones or i in v2_only_nones:
                        distances2.insert(i,2)
                    else:
                        d = (vector1[i]-vector2[i])**2
                        distances2.insert(i,d)
                        
    #we find euclidian distance for first half (remaining time) and second half (final grade) 
    #and then calculate weighted average for overall distance
    return 0.5*np.sqrt(sum(distances1))+0.5*np.sqrt(sum(distances2)) 

In [24]:
#returns list of vectors with index in selected indices
def selectVectors(selected_indices,all_vectors):
    selected = []
    for i in selected_indices:
        selected.append(all_vectors[i])
    return selected

#find mean of a list of vectors
#def find_mean(vectors):
    
    #assuming all have same length
    #length = len(vectors[0])
    #no_vectors = len(vectors)
    #nones = []
    #mean = []
    #for v in vectors:
        #nones.extend(getNones(v))
    #nones_set = set(nones)
    
    #for i in range(length):
        #if i in nones_set:
            #mean.insert(i,None)
        #else:
            #pos_i = [v[i] for v in vectors]
            #print(pos_i)
            #avg = sum(pos_i)/no_vectors
            #print(avg)
            #mean.insert(i,avg)
    #return mean


def find_mean(vectors):
    
    length = len(vectors[0])
    no_vectors = len(vectors)
    #list of lists each entry i a list of positions of None for vectors[i]
    nones = []
    mean = []
    nones_per_pos = []
    
    for k in range(no_vectors):
        nones.append(getNones(vectors[k]))

    #for each position of vector, we save the index of vectors where it appears    
    for i in range(length):
        nones_i = [j for j, elem in enumerate(nones) if i in elem]
        #nones_per_pos.insert(i,nones_i)
        values_i = list(set(range(len(nones)))-set(nones_i))
        if values_i != []:
            selected = selectVectors(values_i,vectors)
            summ = sum([s[i] for s in selected])
            entry = summ/len(values_i)
            mean.insert(i,entry)
        else:
            mean.insert(i,None)
        
    return mean
    
    
    
#measure variance in a set of vectors and return standard deviation
def variance_cluster(indices,all_vectors):
    vectors = selectVectors(indices,all_vectors)
    cluster_mean = find_mean(vectors)
    length = len(indices)
    squared_distances = []
    for i in range(length):
        dist_i = distance(vectors[i],cluster_mean)
        squared_distances.append(dist_i**2)
    var = sum(squared_distances)/length
    return var 


#returns weighted mean of the variances in the given two partitions
def mean_variance(partition_pair,all_vectors):
    
    all_indices = partition_pair[0] + partition_pair[1]
    length = len(all_indices)
    len1 = len(partition_pair[0])
    len2 = len(partition_pair[1])
    #groups = selectVectors(all_indices,all_vectors)
    #group1 = selectVectors(partition_pair[0],groups)
    #group2 = selectVectors(partition_pair[1],groups)
    
    var1 = variance_cluster(partition_pair[0],all_vectors)
    var2 = variance_cluster(partition_pair[1],all_vectors)
    
    weighted_var = (len1/length)*var1 + (len2/length)*var2
    return weighted_var

In [25]:
#partition vectors with index in indices on single condition (string1=column name, string2 = value)
def divide_condition(indices,condition):
    
    col_name = condition[0]
    value = condition[1]
    
    indices_true = data.index[data[col_name] == value].tolist()
    indices_false = list(set(indices)-set(indices_true))
    
    return(indices_true,indices_false)

#partition vectors with index in indices based on a list of conditions
def divide(indices,conditions):
    
    satisfying_all = indices
    
    for i in range(len(conditions)):
        if satisfying_all != []:
            division_i = divide_condition(satisfying_all,conditions[i])
            satisfying_all = list(set(satisfying_all)-set(division_i[1]))
        else:
            return ([],indices)
    
    indices_true = satisfying_all
    indices_false = list((set(indices)-set(indices_true)))
    
    return(indices_true,indices_false)

In [26]:
variance_cluster([1,2,3],all_vectors)

4.272751582596036

In [27]:
variance_cluster([0,1,2,3],all_vectors)

7.872518767553991

In [28]:
variance_cluster([0,6,8],all_vectors)

7.756869201747773

In [29]:
mean_variance(([1,2,3],[0,4,5,6,7,8,9]),all_vectors)

12.537428301380316

In [30]:
mean_variance(([0,1,2,3],[4,5,6,7,8,9]),all_vectors)

12.71386104542499

In [31]:
mean_variance(([0,6,8],[3,4,5,1,7,2,9]),all_vectors)

14.396572609137284

In [32]:
mean_variance(([6,8],[0,3,4,5,1,7,2,9]),all_vectors)

18.2439303924999

In [33]:
mean_variance(([4,5,9],[1,2,3,7]),all_vectors) 

8.430392056256746

In [34]:
variance_cluster([4,5,9],all_vectors)

2.000138885272451

In [35]:
variance_cluster([0,1,2,3,6,7,8],all_vectors)

19.419585140231813

In [36]:
mean_variance(([1,2,3],[0,6,7,8]),all_vectors)

9.524812712289766

In [37]:
mean_variance(([0,1,2,3,6,8],[7]),all_vectors)

10.878134711914976

In [38]:
mean_variance(([1,2,3,7],[0,6,8]),all_vectors)

10.897562191889028

In [39]:
mean_variance(([7],[0,6,8]),all_vectors)

5.81765190131083

In [40]:
variance_cluster([0,6,8,7],all_vectors)

13.463858559560062

In [41]:
partition1 = divide(range(len(all_vectors)),[("Field","SE"),("Supervisor","Brown")])
print("First partition:",partition1)

First partition: ([9, 4, 5], [0, 1, 2, 3, 6, 7, 8])


In [42]:
partition2 = divide(partition1[1],[("Field","DS"),("PK","high")])
print("Second partition:",partition2)

Second partition: ([1, 2, 3], [0, 8, 6, 7])


In [43]:
partition3 = divide(partition2[1],[("PK","high")])
print("Third partition:",partition3)

Third partition: ([7], [0, 8, 6])


In [44]:
var1 = mean_variance(partition1,all_vectors)
var2 = mean_variance(partition2,all_vectors)
var3 = mean_variance(partition3,all_vectors)

print("Variance after applying condition 1:", var1)
print("Variance after applying condition 2:", var2)
print("Variance after applying condition 3:", var3)

Variance after applying condition 1: 14.193751263744003
Variance after applying condition 2: 9.524812712289766
Variance after applying condition 3: 5.81765190131083


In [45]:
states1 = pd.DataFrame(columns=['State','Multiset of predictions', 'Annotation'])
states1['State'] = [states[0],states[1],states[2],states[10],states[14],states[11],states[12]]
states1['Multiset of predictions'] = [[(105,0.8),(100,0.75),(95,0.8)], [(104,0.8),(99,0.75),(94,0.8)], [(60,0.8),(55,0.75),(55,0.8)], [(45,0.8),(30,0.75)], [(35,0.8)], [(20,0.8),(10,0.75),(15,0.8)],[(0,0.8),(0,0.75),(0,0.8)]]
states1['Annotation'] = [(100,0.78),(99,0.78),(56.67,0.78),(37.5,0.775),(35,0.8),(15,0.78),(0,0.78)]
states1

Unnamed: 0,State,Multiset of predictions,Annotation
0,{},"[(105, 0.8), (100, 0.75), (95, 0.8)]","(100, 0.78)"
1,"{(I, 1)}","[(104, 0.8), (99, 0.75), (94, 0.8)]","(99, 0.78)"
2,"{(M, 1), (I, 1)}","[(60, 0.8), (55, 0.75), (55, 0.8)]","(56.67, 0.78)"
3,"{(R, 1), (M, 1), (I, 1)}","[(45, 0.8), (30, 0.75)]","(37.5, 0.775)"
4,"{(F, 1), (M, 1), (I, 1)}","[(35, 0.8)]","(35, 0.8)"
5,"{(F, 1), (R, 1), (M, 1), (I, 1)}","[(20, 0.8), (10, 0.75), (15, 0.8)]","(15, 0.78)"
6,"{(F, 1), (R, 1), (S, 1), (M, 1), (I, 1)}","[(0, 0.8), (0, 0.75), (0, 0.8)]","(0, 0.78)"


In [46]:
df_to_latex(states1, "states1.tex")

In [47]:
states2 = pd.DataFrame(columns=['State','Multiset of predictions', 'Annotation'])
states2['State'] = [states[0],states[1],states[2],states[3],states[4],states[9],states[7],states[8]]
states2['Multiset of predictions'] = [[(95,0.85),(90,0.95),(100,0.9)], [(94,0.85),(89,0.95),(99,0.9)], [(65,0.85),(60,0.95),(60,0.9)], [(40,0.85),(40,0.95),(40,0.9)], [(15,0.85),(15,0.95)], [(20,0.9)], [(5,0.85),(5,0.95),(10,0.9)],[(0,0.85),(0,0.95),(0,0.9)]]
states2['Annotation'] = [(95,0.9),(94,0.9),(61.67,0.9),(40,0.9),(15,0.9),(20,0.9),(6.67,0.9),(0,0.9)]
states2

Unnamed: 0,State,Multiset of predictions,Annotation
0,{},"[(95, 0.85), (90, 0.95), (100, 0.9)]","(95, 0.9)"
1,"{(I, 1)}","[(94, 0.85), (89, 0.95), (99, 0.9)]","(94, 0.9)"
2,"{(M, 1), (I, 1)}","[(65, 0.85), (60, 0.95), (60, 0.9)]","(61.67, 0.9)"
3,"{(M, 2), (I, 1)}","[(40, 0.85), (40, 0.95), (40, 0.9)]","(40, 0.9)"
4,"{(F, 1), (M, 2), (I, 1)}","[(15, 0.85), (15, 0.95)]","(15, 0.9)"
5,"{(M, 2), (R, 1), (I, 1)}","[(20, 0.9)]","(20, 0.9)"
6,"{(F, 1), (M, 2), (R, 1), (I, 1)}","[(5, 0.85), (5, 0.95), (10, 0.9)]","(6.67, 0.9)"
7,"{(F, 1), (R, 1), (S, 1), (M, 2), (I, 1)}","[(0, 0.85), (0, 0.95), (0, 0.9)]","(0, 0.9)"


In [48]:
df_to_latex(states1, "states2.tex")

In [49]:
states3 = pd.DataFrame(columns=['State','Multiset of predictions', 'Annotation'])
states3['State'] = [states[0],states[1],states[2],states[10],states[9],states[13]]
states3['Multiset of predictions'] = [[(100,0.7)], [(99,0.7)], [(65,0.7)], [(40,0.7)], [(10,0.7)], [(0,0.7)]]
states3['Annotation'] = [(100,0.7), (99,0.7), (65,0.7), (40,0.7), (10,0.7), (0,0.7)]
states3

Unnamed: 0,State,Multiset of predictions,Annotation
0,{},"[(100, 0.7)]","(100, 0.7)"
1,"{(I, 1)}","[(99, 0.7)]","(99, 0.7)"
2,"{(M, 1), (I, 1)}","[(65, 0.7)]","(65, 0.7)"
3,"{(R, 1), (M, 1), (I, 1)}","[(40, 0.7)]","(40, 0.7)"
4,"{(M, 2), (R, 1), (I, 1)}","[(10, 0.7)]","(10, 0.7)"
5,"{(M, 2), (R, 1), (S, 1), (I, 1)}","[(0, 0.7)]","(0, 0.7)"


In [50]:
df_to_latex(states1, "states3.tex")

In [51]:
states4 = pd.DataFrame(columns=['State','Multiset of predictions', 'Annotation'])
states4['State'] = [states[0],states[1],states[2],states[3],states[14],states[4],states[5],states[6]]
states4['Multiset of predictions'] = [[(100,0.7),(100,0.7),(110,0.6)], [(99,0.7),(99,0.7),(109,0.6)], [(60,0.7),(60,0.7),(60,0.6)], [(40,0.7),(40,0.7)], [(40,0.6)], [(15,0.7),(20,0.7),(20,0.6)], [(5,0.7),(10,0.7),(15,0.6)], [(0,0.7),(0,0.7),(0,0.6)]]
states4['Annotation'] = [(103.33,0.67), (102.33,0.67), (60,0.67), (40,0.7), (40,0.6), (18.33,0.67), (10,0.67), (0,0.67)]
states4

Unnamed: 0,State,Multiset of predictions,Annotation
0,{},"[(100, 0.7), (100, 0.7), (110, 0.6)]","(103.33, 0.67)"
1,"{(I, 1)}","[(99, 0.7), (99, 0.7), (109, 0.6)]","(102.33, 0.67)"
2,"{(M, 1), (I, 1)}","[(60, 0.7), (60, 0.7), (60, 0.6)]","(60, 0.67)"
3,"{(M, 2), (I, 1)}","[(40, 0.7), (40, 0.7)]","(40, 0.7)"
4,"{(F, 1), (M, 1), (I, 1)}","[(40, 0.6)]","(40, 0.6)"
5,"{(F, 1), (M, 2), (I, 1)}","[(15, 0.7), (20, 0.7), (20, 0.6)]","(18.33, 0.67)"
6,"{(F, 1), (M, 3), (I, 1)}","[(5, 0.7), (10, 0.7), (15, 0.6)]","(10, 0.67)"
7,"{(F, 1), (S, 1), (M, 3), (I, 1)}","[(0, 0.7), (0, 0.7), (0, 0.6)]","(0, 0.67)"


In [52]:
df_to_latex(states1, "states4.tex")

In [53]:
states

[set(),
 {('I', 1)},
 {('I', 1), ('M', 1)},
 {('I', 1), ('M', 2)},
 {('F', 1), ('I', 1), ('M', 2)},
 {('F', 1), ('I', 1), ('M', 3)},
 {('F', 1), ('I', 1), ('M', 3), ('S', 1)},
 {('F', 1), ('I', 1), ('M', 2), ('R', 1)},
 {('F', 1), ('I', 1), ('M', 2), ('R', 1), ('S', 1)},
 {('I', 1), ('M', 2), ('R', 1)},
 {('I', 1), ('M', 1), ('R', 1)},
 {('F', 1), ('I', 1), ('M', 1), ('R', 1)},
 {('F', 1), ('I', 1), ('M', 1), ('R', 1), ('S', 1)},
 {('I', 1), ('M', 2), ('R', 1), ('S', 1)},
 {('F', 1), ('I', 1), ('M', 1)}]

In [54]:
states[6]

{('F', 1), ('I', 1), ('M', 3), ('S', 1)}

In [55]:
(15+40)/3

18.333333333333332

In [56]:
(0.7+0.7+0.6)/3

0.6666666666666666

In [57]:
20/3

6.666666666666667