In [49]:
%load_ext autoreload
%autoreload 2

UNIQUE_VALUES_THRESHOLD = 200

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [50]:
import numpy
import pandas

In [51]:
def analyze(scores: pandas.DataFrame, by, filters = [], agg = 'mean'):
    scores["score"] = scores["subj1"] + scores["subj2"] + scores["subj3"]
    scores[["Source", "Algorithm", "Dimension", "Stat"]] = scores['name'].str.split(' ', expand = True)
    for filt in filters:
        scores = filt(scores)

    scores = scores[[by, "score"]]
    print('Features: ', scores.shape[0])
    match agg:
        case 'mean':
            stats = scores.groupby(by = by).mean().reset_index()
        case 'count':
            stats = scores.groupby(by = by).count().reset_index()
    return stats.sort_values(by = 'score', ascending = False)

### QSDA

In [52]:
def construct_joint_qsda():
    scores1 = pandas.read_csv("Subj1/exp_full_flow/qsda/scores.csv")
    scores1["score"] = scores1["score"] * (scores1["unique_values"] >= UNIQUE_VALUES_THRESHOLD)
    
    scores2 = pandas.read_csv("Subj2/exp_full_flow/qsda/scores.csv")
    scores2["score"] = scores2["score"] * (scores2["unique_values"] >= UNIQUE_VALUES_THRESHOLD)

    scores3 = pandas.read_csv("Subj3/exp_full_flow/qsda/scores.csv")
    scores3["score"] = scores3["score"] * (scores3["unique_values"] >= UNIQUE_VALUES_THRESHOLD)
    
    scores = scores1.join(scores2, lsuffix = "_subj1", rsuffix = "_subj2").join(scores3, rsuffix = "_subj3")
    scores = scores[["name_subj1", "score_subj1", "score_subj2", "score" ]]
    scores = scores.rename(columns = { 'name_subj1': 'name', 'score_subj1': 'subj1', 'score_subj2': 'subj2', 'score': 'subj3' })

    return scores

In [53]:
def filter1(scores):
    scores = scores[scores["Stat"] != "percentile-25"]
    scores = scores[scores["Stat"] != "median"]
    scores = scores[scores["Stat"] != "percentile-75"]
    scores = scores[scores["Stat"] != "skew"]
    scores = scores[scores["Stat"] != "kurtosis"]
    return scores

display(analyze(construct_joint_qsda(), by = 'Stat'))
display(analyze(construct_joint_qsda(), by = 'Stat', filters = [ filter1 ]))

Features:  19143


Unnamed: 0,Stat,score
2,mean,2.418383
4,norm-1,2.320092
10,sum,2.320092
5,norm-2,2.090892
9,std,2.003843
1,max,1.79858
0,kurtosis,1.329754
3,median,1.181211
7,percentile-75,1.043281
8,skew,0.960317


Features:  12543


Unnamed: 0,Stat,score
1,mean,2.418383
2,norm-1,2.320092
5,sum,2.320092
3,norm-2,2.090892
4,std,2.003843
0,max,1.79858


In [54]:
def filter2(scores):
    scores = scores[scores["Dimension"] != "all"]
    scores = scores[scores["Dimension"] != "dim-4"]
    scores = scores[scores["Dimension"] != "dim-5"]
    return scores

display(analyze(construct_joint_qsda(), by = 'Dimension', filters = [ filter1 ]))
display(analyze(construct_joint_qsda(), by = 'Dimension', filters = [ filter1, filter2 ]))

Features:  12543


Unnamed: 0,Dimension,score
7,norm-2,3.216626
6,norm-1,3.112098
1,dim-1,2.852267
0,all,2.79276
2,dim-2,2.210085
3,dim-3,1.739903
4,dim-4,1.299172
5,dim-5,0.922026


Features:  9339


Unnamed: 0,Dimension,score
4,norm-2,3.216626
3,norm-1,3.112098
0,dim-1,2.852267
1,dim-2,2.210085
2,dim-3,1.739903


In [55]:
def filter3(scores):
    scores = scores[scores["Algorithm"] != "betti"]
    scores = scores[scores["Algorithm"] != "numberofpoints"]
    scores = scores[scores["Algorithm"] != "amplitude-betti-1"]
    scores = scores[scores["Algorithm"] != "amplitude-betti-2"]
    return scores

display(analyze(construct_joint_qsda(), by = 'Algorithm', filters = [ filter1, filter2 ]))
display(analyze(construct_joint_qsda(), by = 'Algorithm', filters = [ filter1, filter2, filter3 ]))

Features:  9339


Unnamed: 0,Algorithm,score
8,amplitude-silhouette-1-2,4.008034
7,amplitude-silhouette-1-1,3.961057
3,amplitude-landscape-1-1,3.495149
4,amplitude-landscape-1-2,3.345452
19,silhouette-1,3.33627
9,amplitude-silhouette-2-1,3.310503
20,silhouette-2,3.301615
10,amplitude-silhouette-2-2,3.284384
5,amplitude-landscape-2-1,2.937374
16,landscape,2.91571


Features:  7728


Unnamed: 0,Algorithm,score
6,amplitude-silhouette-1-2,4.008034
5,amplitude-silhouette-1-1,3.961057
1,amplitude-landscape-1-1,3.495149
2,amplitude-landscape-1-2,3.345452
15,silhouette-1,3.33627
7,amplitude-silhouette-2-1,3.310503
16,silhouette-2,3.301615
8,amplitude-silhouette-2-2,3.284384
3,amplitude-landscape-2-1,2.937374
13,landscape,2.91571


In [56]:
def filter4(scores):
    for source in numpy.unique(scores["Source"]):
        if source.startswith("dissim"):
            scores = scores[scores["Source"] != source]
    return scores

display(analyze(construct_joint_qsda(), by = 'Source', filters = [ filter1, filter2, filter3, filter4 ]).head(60))

Features:  4176


Unnamed: 0,Source,score
3,channel-11,5.223644
35,channel-7,5.013056
25,channel-31,4.912045
18,channel-25,4.844955
37,channel-9,4.763065
0,channel-0,4.743569
15,channel-22,4.608109
17,channel-24,4.560234
1,channel-1,4.547576
11,channel-19,4.547441


In [57]:
scores = construct_joint_qsda()
scores["score"] = scores["subj1"] + scores["subj2"] + scores["subj3"]
scores[["Source", "Algorithm", "Dimension", "Stat"]] = scores['name'].str.split(' ', expand = True)
for filt in [ filter1, filter2, filter3, filter4 ]:
    scores = filt(scores)
scores = scores.reset_index(drop = True)
display(scores)

Unnamed: 0,name,subj1,subj2,subj3,score,Source,Algorithm,Dimension,Stat
0,channel-0 landscape dim-1 max,2.048583,1.010298,1.687861,4.746743,channel-0,landscape,dim-1,max
1,channel-0 landscape dim-1 mean,2.457866,1.310240,2.395157,6.163264,channel-0,landscape,dim-1,mean
2,channel-0 landscape dim-1 std,2.235899,1.314854,2.078066,5.628818,channel-0,landscape,dim-1,std
3,channel-0 landscape dim-1 sum,2.457866,1.310240,2.395157,6.163264,channel-0,landscape,dim-1,sum
4,channel-0 landscape dim-1 norm-1,2.457866,1.310240,2.395157,6.163264,channel-0,landscape,dim-1,norm-1
...,...,...,...,...,...,...,...,...,...
4171,overall bd2 dim-3 mean,1.220762,1.471759,2.207955,4.900476,overall,bd2,dim-3,mean
4172,overall bd2 dim-3 std,1.708366,0.442976,0.700258,2.851600,overall,bd2,dim-3,std
4173,overall bd2 dim-3 sum,1.910437,0.264090,0.989711,3.164238,overall,bd2,dim-3,sum
4174,overall bd2 dim-3 norm-1,1.910437,0.264090,0.989711,3.164238,overall,bd2,dim-3,norm-1


In [79]:
def validate(subj):
    match subj:
        case 'Subj1':
            QSDA_THRESHOLD = 0.4
        case 'Subj2':
            QSDA_THRESHOLD = 0.28
        case 'Subj3':
            QSDA_THRESHOLD = 0.32

    cou = 0
    src = pandas.read_csv(f"{subj}/exp_full_flow/qsda/scores.csv")
    src["selected"] = (src["unique_values"] >= UNIQUE_VALUES_THRESHOLD) & (src["normalized_score"] >= QSDA_THRESHOLD)
    for _, row in src.iterrows():
        if not row["selected"]:
            continue
        if "all" in row["name"]:
            continue
        if "dissim" not in row["name"]:
            continue
        if row["name"] in scores["name"].to_numpy():
            continue
        print(row["name"])
        cou += 1
    print(f"Missed {cou} features")

In [80]:
validate("Subj1")

dissim-0 landscape dim-1 kurtosis
dissim-0 silhouette-2 dim-1 kurtosis
dissim-0 entropy dim-1
dissim-0 life dim-1 percentile-25
dissim-0 life dim-1 median
dissim-0 bd2 dim-2 std
dissim-1 landscape dim-1 kurtosis
dissim-1 entropy dim-1
dissim-1 amplitude-landscape-1-2 dim-3
dissim-1 life dim-2 std
dissim-1 bd2 dim-2 std
dissim-2 entropy dim-1
dissim-2 amplitude-landscape-1-2 dim-3
dissim-3 landscape dim-3 mean
dissim-3 landscape dim-3 sum
dissim-3 landscape dim-3 norm-1
dissim-3 entropy dim-1
dissim-3 amplitude-landscape-1-1 dim-3
dissim-3 bd2 dim-2 std
dissim-4 landscape dim-1 kurtosis
dissim-4 entropy dim-1
dissim-4 amplitude-landscape-1-2 dim-3
dissim-4 amplitude-landscape-2-2 dim-3
dissim-4 bd2 dim-2 std
dissim-5 landscape dim-1 kurtosis
dissim-5 silhouette-1 dim-1 kurtosis
dissim-5 silhouette-2 dim-1 kurtosis
dissim-5 bd2 dim-2 std
dissim-6 landscape dim-2 mean
dissim-6 landscape dim-2 sum
dissim-6 landscape dim-2 norm-1
dissim-6 landscape dim-3 mean
dissim-6 landscape dim-3 sum
di

In [81]:
validate("Subj2")

dissim-0 landscape dim-5 mean
dissim-0 landscape dim-5 sum
dissim-0 landscape dim-5 norm-1
dissim-0 silhouette-1 dim-1 median
dissim-0 silhouette-1 dim-5 mean
dissim-0 silhouette-1 dim-5 sum
dissim-0 silhouette-1 dim-5 norm-1
dissim-0 silhouette-2 dim-1 median
dissim-0 silhouette-2 dim-5 mean
dissim-0 silhouette-2 dim-5 sum
dissim-0 silhouette-2 dim-5 norm-1
dissim-0 entropy dim-1
dissim-0 amplitude-landscape-1-1 dim-5
dissim-0 amplitude-landscape-1-2 dim-5
dissim-0 amplitude-silhouette-1-1 dim-5
dissim-0 amplitude-silhouette-1-2 dim-5
dissim-1 landscape dim-1 median
dissim-1 silhouette-1 dim-1 median
dissim-1 silhouette-2 dim-1 median
dissim-1 entropy dim-1
dissim-1 amplitude-landscape-1-2 dim-5
dissim-1 bd2 dim-4 std
dissim-2 landscape dim-5 max
dissim-2 silhouette-1 dim-5 mean
dissim-2 silhouette-1 dim-5 sum
dissim-2 silhouette-1 dim-5 norm-1
dissim-2 silhouette-2 dim-1 median
dissim-2 silhouette-2 dim-5 max
dissim-2 silhouette-2 dim-5 mean
dissim-2 silhouette-2 dim-5 sum
dissim-2 s

In [82]:
validate("Subj3")

dissim-0 landscape dim-1 median
dissim-0 silhouette-1 dim-1 median
dissim-0 silhouette-2 dim-1 median
dissim-0 entropy dim-1
dissim-0 bd2 dim-3 std
dissim-1 silhouette-1 dim-1 median
dissim-1 silhouette-2 dim-1 median
dissim-1 entropy dim-1
dissim-2 silhouette-1 dim-1 median
dissim-2 entropy dim-1
dissim-3 silhouette-1 dim-1 median
dissim-3 silhouette-2 dim-1 median
dissim-3 entropy dim-1
dissim-4 silhouette-1 dim-1 median
dissim-4 silhouette-2 dim-1 median
dissim-4 amplitude-landscape-1-2 dim-4
dissim-5 landscape dim-4 mean
dissim-5 landscape dim-4 sum
dissim-5 landscape dim-4 norm-1
dissim-5 silhouette-1 dim-1 median
dissim-5 silhouette-2 dim-1 median
dissim-5 entropy dim-1
dissim-5 amplitude-landscape-1-1 dim-4
dissim-5 bd2 dim-3 std
dissim-6 landscape dim-1 median
dissim-6 silhouette-1 dim-1 median
dissim-6 silhouette-2 dim-1 median
dissim-6 entropy dim-1
dissim-7 landscape dim-1 median
dissim-7 landscape dim-4 mean
dissim-7 landscape dim-4 sum
dissim-7 landscape dim-4 norm-1
dissi

### IV

In [62]:
def construct_joint_iv():
    scores1 = pandas.read_csv("Subj1/exp_full_flow/iv/iv.csv")
    scores2 = pandas.read_csv("Subj2/exp_full_flow/iv/iv.csv")
    scores3 = pandas.read_csv("Subj3/exp_full_flow/iv/iv.csv")
    
    scores = scores1.join(scores2, lsuffix = "_subj1", rsuffix = "_subj2").join(scores3, rsuffix = "_subj3")
    scores = scores[["Feature_subj1", "IV_subj1", "IV_subj2", "IV" ]]
    scores = scores.rename(columns = { 'Feature_subj1': 'name', 'IV_subj1': 'subj1', 'IV_subj2': 'subj2', 'IV': 'subj3' })

    return scores

In [None]:
def filter1(scores):
    scores = scores[scores["Stat"] != "percentile-25"]
    scores = scores[scores["Stat"] != "median"]
    scores = scores[scores["Stat"] != "percentile-75"]
    scores = scores[scores["Stat"] != "skew"]
    scores = scores[scores["Stat"] != "kurtosis"]
    return scores

display(analyze(construct_joint_iv(), by = 'Stat'))
display(analyze(construct_joint_iv(), by = 'Stat', filters = [ filter1 ]))

In [None]:
def filter2(scores):
    scores = scores[scores["Dimension"] != "all"]
    scores = scores[scores["Dimension"] != "dim-4"]
    scores = scores[scores["Dimension"] != "dim-5"]
    return scores

display(analyze(construct_joint_iv(), by = 'Dimension', filters = [ filter1 ]))
display(analyze(construct_joint_iv(), by = 'Dimension', filters = [ filter1, filter2 ]))

In [None]:
def filter3(scores):
    scores = scores[scores["Algorithm"] != "betti"]
    scores = scores[scores["Algorithm"] != "numberofpoints"]
    scores = scores[scores["Algorithm"] != "amplitude-betti-1"]
    scores = scores[scores["Algorithm"] != "amplitude-betti-2"]
    return scores

display(analyze(construct_joint_iv(), by = 'Algorithm', filters = [ filter1, filter2 ]))
display(analyze(construct_joint_iv(), by = 'Algorithm', filters = [ filter1, filter2, filter3 ]))

In [None]:
def filter4(scores):
    for source in numpy.unique(scores["Source"]):
        if source.startswith("dissim"):
            scores = scores[scores["Source"] != source]
    return scores

display(analyze(construct_joint_iv(), by = 'Source', filters = [ filter1, filter2, filter3, filter4 ]).head(60))