In [57]:
from sklearn.linear_model import LogisticRegression
import pandas as pd
from scipy.stats import zscore
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from scipy.stats import mannwhitneyu, ttest_ind

# Function to divide into two classes

In [58]:
def std_classification(df):
    temp = zscore(df)
    temp = [0 if l<=0.0 else 1 for l in temp.values]
    return temp

In [59]:
valid_uids = pd.read_csv("../out/our_tracking_features.csv")[["uid"]]

# Cole 2011 Features (MCQ Scores)

In [60]:
features_lightning = pd.read_csv("../out/cole2011_tracking_features.csv")
mcq_scores = pd.read_csv("../data/mcq_scores.tsv", sep="\t")
features = pd.merge(features_lightning, mcq_scores, on="uid", how="inner")
features = pd.merge(features, valid_uids, on="uid", how="inner")
features["class"] = features[["kg"]].apply(std_classification)

res = []

low = features[features["class"] == 0]
high = features[features["class"] == 1]
cols = features.drop(columns=["source", "task", "acode", "uid", "pre", "post", "kg", "class"], errors="ignore").columns

for col in cols:
    res.append([col, mannwhitneyu(low[col].dropna(), high[col].dropna())[1], low[col].median(), high[col].median(), f"{low[col].mean().round(2)} +- {low[col].std().round(2)}", f"{high[col].mean().round(2)} +- {high[col].std().round(2)}"])
    
df = pd.DataFrame(res, columns=["Feature Name", "M-W U (p-value)", "median(low)", "median(high)", "mean+-std(low)", "mean+-std(high)"])
df = df.round(3)
cole_mcq_results = df[["Feature Name", "M-W U (p-value)"]].rename(columns={"M-W U (p-value)": "cole_mcq_M-W U (p-value)"})
df.to_markdown("../results/cole2011_mcq_scores_kg.md", index=False)
df

Unnamed: 0,Feature Name,M-W U (p-value),median(low),median(high),mean+-std(low),mean+-std(high)
0,n_CP_visited,0.821,6.0,6.5,7.24 +- 4.64,7.0 +- 3.12
1,sum_fix_dur,0.314,485008.0,414958.15,488957.4 +- 269601.68,436337.85 +- 214791.67
2,mean_fix_dur,0.031,453.011,424.109,467.11 +- 81.62,434.31 +- 63.14
3,n_fixs,0.865,963.0,923.5,1063.2 +- 621.83,1013.35 +- 508.86
4,max_sum_reading_dur_per_content-page,0.609,27810.25,24655.15,34920.57 +- 27930.4,33164.86 +- 27474.04
5,mean_sum_reading_dur_per_content-page,0.689,11387.786,11828.631,18272.22 +- 22316.64,15863.77 +- 10588.05
6,mean_dur_per_RS,0.025,2341.3,2090.722,2350.0 +- 558.0,2128.45 +- 361.47
7,n_RS,0.425,26.5,32.5,31.21 +- 22.94,34.42 +- 23.67
8,sum_RF_dur,0.966,63741.25,68304.2,71822.51 +- 47811.15,71611.25 +- 46130.09
9,mean_RF_dur_per_CP,0.689,11387.786,11828.631,18272.22 +- 22316.64,15863.77 +- 10588.05


# Cole 2011 Features (Essay Scores)

In [61]:
features_lightning = pd.read_csv("../out/cole2011_tracking_features.csv")
essay_scores = pd.read_csv("../data/essay_scores.csv")
features = pd.merge(features_lightning, essay_scores, on="uid", how="inner")
features = pd.merge(features, valid_uids, on="uid", how="inner")
features["class"] = features[["kg"]].apply(std_classification)

res = []

low = features[features["class"] == 0]
high = features[features["class"] == 1]
cols = features.drop(columns=["source", "task", "acode", "uid", "pre", "post", "kg", "class"], errors="ignore").columns

for col in cols:
    res.append([col, mannwhitneyu(low[col].dropna(), high[col].dropna())[1], low[col].median(), high[col].median(), f"{low[col].mean().round(2)} +- {low[col].std().round(2)}", f"{high[col].mean().round(2)} +- {high[col].std().round(2)}"])
    
df = pd.DataFrame(res, columns=["Feature Name", "M-W U (p-value)", "median(low)", "median(high)", "mean+-std(low)", "mean+-std(high)"])
df = df.round(3)
cole_essay_results = df[["Feature Name", "M-W U (p-value)"]].rename(columns={"M-W U (p-value)": "cole_essay_M-W U (p-value)"})
df.to_markdown("../results/cole2011_essay_scores_kg.md", index=False)
df

Unnamed: 0,Feature Name,M-W U (p-value),median(low),median(high),mean+-std(low),mean+-std(high)
0,n_CP_visited,0.159,6.0,7.0,6.78 +- 4.56,7.43 +- 3.76
1,sum_fix_dur,0.051,343315.8,475733.3,425061.94 +- 282614.44,502864.21 +- 219435.67
2,mean_fix_dur,0.158,446.937,435.239,471.0 +- 90.14,442.27 +- 62.19
3,n_fixs,0.012,859.5,1064.0,896.89 +- 588.72,1157.47 +- 551.27
4,max_sum_reading_dur_per_content-page,0.391,24455.7,27436.1,31506.91 +- 24890.26,36367.24 +- 29609.93
5,mean_sum_reading_dur_per_content-page,0.473,11532.581,11683.836,15443.63 +- 12871.94,18835.17 +- 22221.28
6,mean_dur_per_RS,0.303,2343.137,2178.509,2313.71 +- 561.55,2230.11 +- 454.43
7,n_RS,0.035,21.5,31.0,26.89 +- 20.17,36.67 +- 24.53
8,sum_RF_dur,0.074,54118.0,77032.05,63589.76 +- 49086.29,77993.45 +- 44673.7
9,mean_RF_dur_per_CP,0.473,11532.581,11683.836,15443.63 +- 12871.94,18835.17 +- 22221.28


# Our Features (MCQ Scores)

In [62]:
features_lightning = pd.read_csv("../out/our_tracking_features.csv")
mcq_scores = pd.read_csv("../data/mcq_scores.tsv", sep="\t")
features = pd.merge(features_lightning, mcq_scores, on="uid", how="inner")
features = pd.merge(features, valid_uids, on="uid", how="inner")
features["class"] = features[["kg"]].apply(std_classification)

res = []

low = features[features["class"] == 0]
high = features[features["class"] == 1]
cols = features.drop(columns=["source", "task", "acode", "uid", "pre", "post", "kg", "class"], errors="ignore").columns

for col in cols:
    res.append([col, mannwhitneyu(low[col].dropna(), high[col].dropna())[1], low[col].median(), high[col].median(), f"{low[col].mean().round(2)} +- {low[col].std().round(2)}", f"{high[col].mean().round(2)} +- {high[col].std().round(2)}"])
    
df = pd.DataFrame(res, columns=["Feature Name", "M-W U (p-value)", "median(low)", "median(high)", "mean+-std(low)", "mean+-std(high)"])
df = df.round(3)
word_level_mcq_results = df[["Feature Name", "median(low)", "median(high)", "mean+-std(low)", "mean+-std(high)", "M-W U (p-value)"]].rename(columns={"M-W U (p-value)": "our_mcq_M-W U (p-value)"})
df.to_markdown("../results/our_mcq_scores_kg.md", index=False)
df

Unnamed: 0,Feature Name,M-W U (p-value),median(low),median(high),mean+-std(low),mean+-std(high)
0,n_CP_visited,0.768,6.0,6.0,6.59 +- 4.15,6.42 +- 2.88
1,sum_fix_dur,0.508,372980.8,316167.8,390998.15 +- 228877.14,356479.94 +- 194805.4
2,mean_fix_dur,0.015,468.292,419.192,471.14 +- 89.8,432.91 +- 63.86
3,n_fixs,0.92,747.0,798.5,844.68 +- 528.41,830.92 +- 459.95
4,mean_dur_per_RS,0.079,1860.48,1698.3,1940.58 +- 564.92,1774.36 +- 319.46
5,n_RS,0.611,89.5,97.5,99.05 +- 67.36,101.42 +- 58.53
6,sum_RF_dur,0.747,176885.5,168084.75,186911.25 +- 116363.45,176989.36 +- 99331.26
7,mean_RF_dur_per_CP,0.722,29934.271,28319.896,40904.72 +- 42270.5,34831.79 +- 20897.4
8,mean_n_RF_per_CP,0.582,65.5,66.042,84.58 +- 98.97,79.44 +- 48.27
9,mean_RF_dur,0.024,481.741,441.146,489.82 +- 108.28,447.26 +- 72.97


# Our Features (Essay Scores)

In [63]:
features_lightning = pd.read_csv("../out/our_tracking_features.csv")
essay_scores = pd.read_csv("../data/essay_scores.csv")
features = pd.merge(features_lightning, essay_scores, on="uid", how="inner")
features = pd.merge(features, valid_uids, on="uid", how="inner")
features["class"] = features[["kg"]].apply(std_classification)

res = []

low = features[features["class"] == 0]
high = features[features["class"] == 1]
cols = features.drop(columns=["source", "task", "acode", "uid", "pre", "post", "kg", "class"], errors="ignore").columns

for col in cols:
    res.append([col, mannwhitneyu(low[col].dropna(), high[col].dropna())[1], low[col].median(), high[col].median(), f"{low[col].mean().round(2)} +- {low[col].std().round(2)}", f"{high[col].mean().round(2)} +- {high[col].std().round(2)}"])
    
df = pd.DataFrame(res, columns=["Feature Name", "M-W U (p-value)", "median(low)", "median(high)", "mean+-std(low)", "mean+-std(high)"])
df = df.round(3)
word_level_essay_results = df[["Feature Name", "M-W U (p-value)"]].rename(columns={"M-W U (p-value)": "our_essay_M-W U (p-value)"})
df.to_markdown("../results/our_essay_scores_kg.md", index=False)
df

Unnamed: 0,Feature Name,M-W U (p-value),median(low),median(high),mean+-std(low),mean+-std(high)
0,n_CP_visited,0.17,6.0,6.0,6.15 +- 3.95,6.82 +- 3.52
1,sum_fix_dur,0.069,273214.8,381949.55,344375.68 +- 240804.37,403729.9 +- 193695.89
2,mean_fix_dur,0.059,464.945,435.942,476.32 +- 98.71,441.68 +- 65.08
3,n_fixs,0.017,654.5,831.5,718.33 +- 496.73,932.38 +- 489.0
4,mean_dur_per_RS,0.046,1891.7,1698.3,1969.48 +- 593.55,1807.6 +- 387.87
5,n_RS,0.006,70.5,102.5,82.83 +- 61.62,113.07 +- 62.98
6,sum_RF_dur,0.04,139253.25,188889.65,162084.85 +- 117506.12,199330.22 +- 101673.82
7,mean_RF_dur_per_CP,0.13,27149.313,31083.212,33002.75 +- 23605.94,42914.28 +- 42477.88
8,mean_n_RF_per_CP,0.045,58.7,69.438,65.35 +- 40.61,95.89 +- 103.31
9,mean_RF_dur,0.046,482.967,443.189,497.04 +- 116.6,455.91 +- 78.01


# Compose results

In [64]:
results = pd.merge(word_level_mcq_results, word_level_essay_results, on="Feature Name", how="left")
results = pd.merge(results, cole_mcq_results, on="Feature Name", how="left")
results = pd.merge(results, cole_essay_results, on="Feature Name", how="left")
results

Unnamed: 0,Feature Name,median(low),median(high),mean+-std(low),mean+-std(high),our_mcq_M-W U (p-value),our_essay_M-W U (p-value),cole_mcq_M-W U (p-value),cole_essay_M-W U (p-value)
0,n_CP_visited,6.0,6.0,6.59 +- 4.15,6.42 +- 2.88,0.768,0.17,0.821,0.159
1,sum_fix_dur,372980.8,316167.8,390998.15 +- 228877.14,356479.94 +- 194805.4,0.508,0.069,0.314,0.051
2,mean_fix_dur,468.292,419.192,471.14 +- 89.8,432.91 +- 63.86,0.015,0.059,0.031,0.158
3,n_fixs,747.0,798.5,844.68 +- 528.41,830.92 +- 459.95,0.92,0.017,0.865,0.012
4,mean_dur_per_RS,1860.48,1698.3,1940.58 +- 564.92,1774.36 +- 319.46,0.079,0.046,0.025,0.303
5,n_RS,89.5,97.5,99.05 +- 67.36,101.42 +- 58.53,0.611,0.006,0.425,0.035
6,sum_RF_dur,176885.5,168084.75,186911.25 +- 116363.45,176989.36 +- 99331.26,0.747,0.04,0.966,0.074
7,mean_RF_dur_per_CP,29934.271,28319.896,40904.72 +- 42270.5,34831.79 +- 20897.4,0.722,0.13,0.689,0.473
8,mean_n_RF_per_CP,65.5,66.042,84.58 +- 98.97,79.44 +- 48.27,0.582,0.045,0.351,0.233
9,mean_RF_dur,481.741,441.146,489.82 +- 108.28,447.26 +- 72.97,0.024,0.046,0.068,0.154


# .................

In [65]:
tests = ["mcq", "essay"]
scores = ["pre", "post", "kg"]

features_lightning = pd.read_csv("../out/our_tracking_features.csv")


df = pd.DataFrame(features_lightning.columns.values, columns=["Feature Name"])
df = df[df["Feature Name"] != "uid"]

for score in scores:
    for test in tests:
        if test == "mcq":
            test_scores = pd.read_csv("../data/mcq_scores.tsv", sep="\t")
        else:
            test_scores = pd.read_csv("../data/essay_scores.csv")
            
        test_scores["class"] = test_scores[[score]].apply(std_classification)

        features = pd.merge(features_lightning, test_scores, on="uid", how="inner")

        res = []

        low = features[features["class"] == 0]
        high = features[features["class"] == 1]
        cols = features.drop(columns=["source", "task", "acode", "uid", "pre", "post", "kg", "class"], errors="ignore").columns

        for col in cols:
            res.append([col, f"{low[col].mean().round(2)} +- {low[col].std().round(2)}", f"{high[col].mean().round(2)} +- {high[col].std().round(2)}"])
            
        df = pd.merge(df, pd.DataFrame(res, columns=["Feature Name", f"{test}_{score}_mean+-std(low)", f"{test}_{score}_mean+-std(high)"]), on="Feature Name", how="left")
df.to_markdown("../results/our_both_tests_feature_values_summary.md", index=False)
df

Unnamed: 0,Feature Name,mcq_pre_mean+-std(low),mcq_pre_mean+-std(high),essay_pre_mean+-std(low),essay_pre_mean+-std(high),mcq_post_mean+-std(low),mcq_post_mean+-std(high),essay_post_mean+-std(low),essay_post_mean+-std(high),mcq_kg_mean+-std(low),mcq_kg_mean+-std(high),essay_kg_mean+-std(low),essay_kg_mean+-std(high)
0,n_CP_visited,6.06 +- 3.34,7.21 +- 4.15,6.47 +- 3.61,6.59 +- 3.86,6.22 +- 3.76,6.85 +- 3.67,6.09 +- 3.72,6.98 +- 3.68,6.59 +- 4.15,6.42 +- 2.88,6.15 +- 3.95,6.82 +- 3.52
1,sum_fix_dur,342392.42 +- 207467.21,430101.22 +- 220899.34,354869.16 +- 199877.18,404847.61 +- 233257.49,359378.78 +- 221350.99,397281.17 +- 211430.35,331835.63 +- 243253.84,425883.68 +- 174009.89,390998.15 +- 228877.14,356479.94 +- 194805.4,344375.68 +- 240804.37,403729.9 +- 193695.89
2,mean_fix_dur,447.81 +- 80.27,469.75 +- 85.62,452.98 +- 73.64,461.05 +- 92.9,472.23 +- 85.21,440.6 +- 77.75,467.71 +- 92.17,445.29 +- 70.86,471.14 +- 89.8,432.91 +- 63.86,476.32 +- 98.71,441.68 +- 65.08
3,n_fixs,775.3 +- 474.26,933.53 +- 530.48,793.09 +- 465.98,893.47 +- 539.65,764.7 +- 476.65,917.15 +- 519.19,708.67 +- 528.65,975.35 +- 436.3,844.68 +- 528.41,830.92 +- 459.95,718.33 +- 496.73,932.38 +- 489.0
4,mean_dur_per_RS,1830.55 +- 452.24,1947.16 +- 543.01,1880.5 +- 519.72,1874.78 +- 462.89,1901.41 +- 469.52,1853.39 +- 517.67,1916.54 +- 561.89,1837.67 +- 408.58,1940.58 +- 564.92,1774.36 +- 319.46,1969.48 +- 593.55,1807.6 +- 387.87
5,n_RS,93.0 +- 60.38,110.12 +- 68.16,97.0 +- 60.44,103.37 +- 68.17,91.87 +- 60.93,108.33 +- 66.38,83.11 +- 66.8,117.42 +- 56.19,99.05 +- 67.36,101.42 +- 58.53,82.83 +- 61.62,113.07 +- 62.98
6,sum_RF_dur,168089.59 +- 105950.36,205257.49 +- 112978.88,177956.49 +- 104783.02,189228.5 +- 116295.56,173650.69 +- 111402.73,193049.6 +- 108433.89,158064.21 +- 121069.71,209235.56 +- 90872.61,186911.25 +- 116363.45,176989.36 +- 99331.26,162084.85 +- 117506.12,199330.22 +- 101673.82
7,mean_RF_dur_per_CP,40115.64 +- 42248.66,36411.58 +- 23438.92,40749.97 +- 43594.01,36127.24 +- 23762.71,40161.14 +- 44657.54,37005.42 +- 23485.53,32243.94 +- 21667.83,45227.13 +- 45318.7,40904.72 +- 42270.5,34831.79 +- 20897.4,33002.75 +- 23605.94,42914.28 +- 42477.88
8,mean_n_RF_per_CP,88.49 +- 101.73,74.07 +- 43.98,88.59 +- 104.98,75.71 +- 47.14,83.89 +- 107.11,81.34 +- 48.51,67.01 +- 45.43,98.86 +- 107.74,84.58 +- 98.97,79.44 +- 48.27,65.35 +- 40.61,95.89 +- 103.31
9,mean_RF_dur,464.92 +- 96.35,486.71 +- 100.86,467.56 +- 87.85,480.98 +- 109.74,492.44 +- 103.56,454.37 +- 89.45,485.43 +- 110.34,461.65 +- 83.4,489.82 +- 108.28,447.26 +- 72.97,497.04 +- 116.6,455.91 +- 78.01
