# Scanpaths

In [None]:
import pandas as pd
from pattern_mining.SequentialPatternMiner import SequentialPatternMiner 
import json
from copy import deepcopy
df = pd.read_csv("/home/gatemrou/uds/thesis/Thesis-Project/analysis/data/final_datasets/final_experiment_fixations.csv")
# squeeze: remove repeating in a row values (a, b, b, c) -> (a, b, c)
squeeze_scanpaths = True

df = df[df['AOI'] != 'non_aoi']
df = df.groupby(['Condition', 'Subject', 'Trial']).agg({
        'AOI': list,
        'StrategyLabel': 'first',
    }).reset_index()
df = df.rename(columns={'AOI': 'Scanpath'})
# print("Scanpath Example:")
# print(df['Scanpath'].iloc[0])
if squeeze_scanpaths:
    df['Scanpath'] = df['Scanpath'].apply(lambda x: [v for i, v in enumerate(x) if i == 0 or v != x[i-1]])
# print("Squeezed Scanpath Example:")
# print(df['Scanpath'].iloc[0])

# Mine Patterns (squeezed results)

In [None]:
spm = SequentialPatternMiner(df, 'Scanpath', split_by=['Condition', 'StrategyLabel'], min_support=0.1, method = 'search')
patterns = spm.mine_patterns()
patterns_df = deepcopy(patterns)
patterns_df['patterns'] = patterns_df['patterns'].apply(json.dumps)
patterns_df.to_csv(f"/home/gatemrou/uds/thesis/Thesis-Project/analysis/scanpath_classification/pattern_mining/mined_patterns/patterns{'_squeezed' if squeeze_scanpaths else ''}.csv", index=False)

Reading and building sequence
dim 5, sequences 1313, events 19864
cost 55509.697622
0 candidates, cost: 55509.697622
cost 55509.697622
[]
Reading and building sequence
dim 5, sequences 1313, events 18454
cost 51806.540945
0 candidates, cost: 51806.540945
cost 51806.540945
[]
Reading and building sequence
dim 5, sequences 1212, events 8410
cost 24960.771467
0 candidates, cost: 24960.771467
cost 24960.771467
[]


## Look at the mined patterns (unsqueezed)

In [4]:
import pandas as pd
try:
    patterns = pd.read_csv("/home/gatemrou/uds/thesis/Thesis-Project/analysis/scanpath_classification/pattern_mining/mined_patterns/patterns.csv")
except FileNotFoundError:
    patterns = spm.get_patterns()   
patterns['patterns'] = patterns['patterns'].apply(eval)
complexL0 = patterns[(patterns['Condition'] == 'complex') & (patterns['StrategyLabel'] == 0)]['patterns'].iloc[0]
mean_length = sum([len(pattern['pattern']) for pattern in complexL0]) / len(complexL0)
for pattern in complexL0:
        print(pattern)



{'pattern': ['trgt', 'trgt', 'trgt', 'trgt'], 'other_info': '0.000000 220.679999 120'}
{'pattern': ['dist', 'dist', 'dist', 'dist'], 'other_info': '0.000000 185.819922 103'}
{'pattern': ['comp', 'comp', 'comp'], 'other_info': '0.000000 176.080341 106'}
{'pattern': ['trgt', 'trgt', 'trgt'], 'other_info': '0.000000 171.817586 102'}
{'pattern': ['comp', 'comp', 'comp', 'comp'], 'other_info': '0.000000 164.595521 91'}
{'pattern': ['sent_msg', 'sent_msg'], 'other_info': '0.000000 146.861742 91'}
{'pattern': ['comp', 'comp', 'comp', 'comp', 'comp'], 'other_info': '0.000000 146.580231 77'}
{'pattern': ['trgt', 'trgt', 'trgt', 'trgt', 'trgt'], 'other_info': '0.000000 144.963692 91'}
{'pattern': ['dist', 'dist', 'dist'], 'other_info': '0.000000 133.164868 80'}
{'pattern': ['trgt', 'trgt', 'trgt', 'trgt', 'trgt', 'trgt'], 'other_info': '0.000000 132.468111 76'}
{'pattern': ['trgt', 'trgt'], 'other_info': '0.000000 131.452469 116'}
{'pattern': ['dist', 'dist', 'dist', 'dist', 'dist', 'dist'], 'ot

## Most are uninteresting as they are just repetitions of the same region of interest :(
## So look at the ones having at least two unique values

### class L0

In [5]:
print(f"Mean length of patterns in complexL0: {mean_length}")
for pattern in complexL0:
    if len(set(pattern['pattern'])) > 1:
        print(pattern)

Mean length of patterns in complexL0: 6.857142857142857
{'pattern': ['comp', 'trgt', 'comp', 'comp', 'comp', 'dist', 'comp', 'comp'], 'other_info': '0.282051 0.448525 4'}


### class L1

In [6]:
complexL1 = patterns[(patterns['Condition'] == 'complex') & (patterns['StrategyLabel'] == 1)]['patterns'].iloc[0]
mean_length = sum([len(pattern['pattern']) for pattern in complexL1]) / len(complexL1)
print(f"Mean length of patterns in complexL1: {mean_length}")
for pattern in complexL1:
    if len(set(pattern['pattern'])) > 1:
        print(pattern)

Mean length of patterns in complexL1: 7.042553191489362
{'pattern': ['comp', 'comp', 'comp', 'comp', 'comp', 'comp', 'comp', 'trgt', 'comp', 'comp'], 'other_info': '0.000000 23.464133 11'}


### class L2

In [7]:
complexL2 = patterns[(patterns['Condition'] == 'complex') & (patterns['StrategyLabel'] == 2)]['patterns'].iloc[0]
mean_length = sum([len(pattern['pattern']) for pattern in complexL2]) / len(complexL2)
print(f"Mean length of patterns in complexL2: {mean_length}")
for pattern in complexL2:
    if len(set(pattern['pattern'])) > 1:
        print(pattern)

Mean length of patterns in complexL2: 8.205479452054794
{'pattern': ['dist', 'dist', 'dist', 'trgt', 'dist'], 'other_info': '0.284553 53.321024 22'}
{'pattern': ['dist', 'dist', 'av_msgs', 'dist', 'av_msgs'], 'other_info': '0.073684 43.866654 22'}
{'pattern': ['dist', 'comp', 'dist', 'comp', 'comp', 'comp', 'dist', 'comp', 'dist', 'comp', 'comp', 'comp'], 'other_info': '0.300353 33.384663 18'}
{'pattern': ['dist', 'comp', 'dist', 'comp', 'comp', 'comp'], 'other_info': '0.174757 32.975822 17'}
{'pattern': ['av_msgs', 'av_msgs', 'av_msgs', 'trgt', 'av_msgs', 'av_msgs'], 'other_info': '0.318182 18.433975 9'}
{'pattern': ['comp', 'comp', 'comp', 'trgt', 'comp', 'comp', 'comp', 'comp', 'comp'], 'other_info': '0.316239 6.499309 10'}
{'pattern': ['trgt', 'trgt', 'trgt', 'trgt', 'trgt', 'av_msgs', 'trgt'], 'other_info': '0.000000 -3.129117 28'}


In [41]:
conditions = ['unambiguous', 'simple', 'complex']
strategies = [0, 1, 2]
k = 3
for condition in conditions:
    for strategy in strategies:
        print(f"Condition: {condition}, Strategy: L{strategy}")
        print("-----------------------------------------------")
        cur_patterns = patterns[(patterns['Condition'] == condition) & (patterns['StrategyLabel'] == strategy)]['patterns'].iloc[0]
        print(f"Patterns found: {len(cur_patterns)}")
        mean_length = sum([len(pattern['pattern']) for pattern in cur_patterns]) / len(cur_patterns)
        print(f"Mean length of patterns in {condition}L{strategy}: {round(mean_length, 2)}")
        print(f"Top-{k} patterns:")
        ordered_patterns = [(pattern['pattern'], int(pattern['other_info'].split()[-1])) for pattern in cur_patterns]
        ordered_patterns = sorted(ordered_patterns, key=lambda a : a[1], reverse=True)
        for i, pattern in enumerate(ordered_patterns):
            if i > k-1:
                break

            if (len(set(pattern[0])) == 1):
                compressed_pattern = f"{pattern[0][0]} * {len(pattern[0])}"
                print(f"    {compressed_pattern} \n        support: {pattern[1]}")
            else:
                print(f"    pattern: {pattern[1]} \n        support: {pattern[1]}")

        print("Unique Patterns")
        for pattern in ordered_patterns:
            if len(set(pattern[0])) > 1:
                print(f"    pattern: {pattern[0]} \n        support: {pattern[1]}")


Condition: unambiguous, Strategy: L0
-----------------------------------------------
Patterns found: 32
Mean length of patterns in unambiguousL0: 6.31
Top-3 patterns:
    sent_msg * 10 
        support: 161
    trgt * 8 
        support: 143
    sent_msg * 3 
        support: 132
Unique Patterns
    pattern: ['dist', 'dist', 'dist', 'trgt', 'dist'] 
        support: 27
Condition: unambiguous, Strategy: L1
-----------------------------------------------
Patterns found: 30
Mean length of patterns in unambiguousL1: 4.77
Top-3 patterns:
    trgt * 7 
        support: 233
    sent_msg * 8 
        support: 134
    sent_msg * 9 
        support: 131
Unique Patterns
    pattern: ['dist', 'trgt', 'dist', 'dist', 'dist'] 
        support: 18
Condition: unambiguous, Strategy: L2
-----------------------------------------------
Patterns found: 42
Mean length of patterns in unambiguousL2: 6.52
Top-3 patterns:
    trgt * 10 
        support: 308
    sent_msg * 8 
        support: 256
    sent_msg * 