# Set up and global variables

In [None]:
from pathlib import Path

import os
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

from IPython.display import display, HTML
from tqdm import tqdm

from src.prioritization import *

# input data
VERSION = '0.0.0'
DATASET_PATH = Path('data') / 'datasets' / f'ipython_{VERSION}'
MODEL_PATH = DATASET_PATH / 'trained_heuristics'
SURVEY_PATH = DATASET_PATH / 'teacher_survey'
FEATURES_PATH = DATASET_PATH / 'hold_out_prioritization_scores'

DATA_PARTITION = 'hold_out'

# output data
OUTPUT_DIR = DATASET_PATH / 'benchmark_dataset'

# config
RESOLUTION = 300

# images
IMAGE_DIR = Path('images') / "survey_results"

os.makedirs(IMAGE_DIR, exist_ok=True)

***

# Loading data

In [None]:
items = pd.read_csv(DATASET_PATH / 'items.csv', index_col=0)
defects = pd.read_csv(DATASET_PATH / f'defects.csv', index_col=0)

responses = pd.read_csv(SURVEY_PATH / 'responses.csv', sep=';', parse_dates=['timestamp'])
feedback = pd.read_csv(SURVEY_PATH / 'feedback.csv', sep=';', parse_dates=['timestamp'])

log = pd.read_csv(DATASET_PATH / DATA_PARTITION/ 'log.csv', index_col=0, parse_dates=['time'])
defect_log = pd.read_csv(DATASET_PATH / DATA_PARTITION / 'defect_log.csv', index_col=0)
defect_log.columns = defect_log.columns.astype(int)

# keep only single response per user-item pair
responses = responses.groupby(['submission id', 'item id']).first().reset_index()

# keep only survey submissions
survey_submissions = responses['submission id'].unique()
log = log.loc[survey_submissions]
defect_log = defect_log.loc[log.index]

In [None]:
# load models for metadata
models = {
    "Task Common": TaskCommonModel,
    "Task Characteristic": TaskCharacteristicModel,
    "Student Frequency": StudentFrequencyModel,
    "Student Characteristic": StudentCharacteristicModel,
    "Student Encountered": StudentEncounteredBeforeModel,
    "Defect Multiplicity": DefectMultiplicityModel,
    "Severity Baseline": SeverityModel,
}

models = {
    name: model.load(MODEL_PATH / f'{name}.pkl')
    for name, model in models.items()
}

In [None]:
# load heuristic scores as features
discrete_features = pd.read_csv(FEATURES_PATH / 'discrete_scores.csv', index_col=0, sep=';')
continuous_features = pd.read_csv(FEATURES_PATH / 'continuous_scores.csv', index_col=0, sep=';')

# keep only survey submissions
discrete_features = discrete_features[discrete_features['submission id'].isin(survey_submissions)]
continuous_features = continuous_features[continuous_features['submission id'].isin(survey_submissions)]

***
# Analysis

# Validation

In [None]:
responses.info()

In [None]:
feedback.info()

In [None]:
vote_counts = responses.groupby(['submission id', 'answer']).size().reset_index(name='count')
ties = vote_counts.groupby('submission id', group_keys=False).apply(lambda x: (x['count'] == x['count'].max()).sum() > 1, include_groups=False)

In [None]:
print('Number of respondents:', responses['respondent'].nunique())
print('Average number of responses:', responses.groupby('respondent').count()['answer'].mean())
print('Average number of answers per submission:', responses.groupby('submission id').count()['answer'].mean())
print('Percentage of tied results:', np.round(ties.mean() * 100, 2), '%')

***
# Benchmark Construction

## Defect pairs

In [None]:
# extract defect pairs
long_defects = defect_log.melt(var_name='defect id', value_name='count', ignore_index=False).reset_index(names=['submission id'])
long_defects = long_defects[long_defects['count'] > 0]

defect_pairs = (
    responses
    .merge(long_defects, on="submission id", how="left")
    .rename(columns={
        "answer": "left",
        "defect id": "right"
    })[["submission id", "left", "right"]]
)
# remove self-pairs
defect_pairs = defect_pairs[defect_pairs["left"] != defect_pairs["right"]]

# add negated pairs
defect_pairs['left won'] = True
negated_pairs = defect_pairs.rename(columns={"left": "right", "right": "left"})
negated_pairs['left won'] = False
defect_pairs = pd.concat([defect_pairs, negated_pairs])


## Heuristics as features

In [None]:
# add discrete features
defect_pairs_with_features = (
    defect_pairs
    .merge(
        discrete_features,
        left_on=["submission id", "left"],
        right_on=["submission id", "defect id"],
        how="left",
    )
    .drop(columns=["defect id"])  # merged from discrete_features
)

defect_pairs_with_features = (
    defect_pairs_with_features
    .merge(
        discrete_features,
        left_on=["submission id", "right"],
        right_on=["submission id", "defect id"],
        how="left",
        suffixes=(" (Left)", " (Right)")
    )
    .drop(columns=["defect id"])  # merged from discrete_features
)
discrete_columns = [col for col in defect_pairs_with_features.columns if col.endswith(" (Left)") or col.endswith(" (Right)")]

# add continuous features
left_continuous_features = defect_pairs_with_features[["submission id", "left"]].merge(
    continuous_features,
    left_on=["submission id", "left"],
    right_on=["submission id", "defect id"],
    how="left",
).drop(columns=["defect id", "submission id", "left"])

right_continuous_features = defect_pairs_with_features[["submission id", "right"]].merge(
    continuous_features,
    left_on=["submission id", "right"],
    right_on=["submission id", "defect id"],
    how="left",
).drop(columns=["defect id", "submission id", "right"])

differential_features = (left_continuous_features - right_continuous_features).add_suffix("_diff")

defect_pairs_with_features = defect_pairs_with_features.merge(differential_features, left_index=True, right_index=True)
continuous_columns = differential_features.columns

In [None]:
# rename the dataframe
df = defect_pairs_with_features

## Feature combinations

### Data mining

In [None]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

X = df[discrete_columns]
y = df['left won']

# convert into binary itemsets
itemsets = X.apply(lambda x: [f"{col}>" if x[col] > 0 else f"{col}<=" for col in X.columns], axis=1)

# encode
te = TransactionEncoder()
te_ary = te.fit(itemsets).transform(itemsets)
encoded = pd.DataFrame(te_ary, columns=te.columns_)

encoded['winner'] = y.values.astype(bool)

# run apriori
frequent_itemsets = apriori(encoded, min_support=0.1, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

# filter rules
# predicting the output variable
rules = rules[rules['consequents'].apply(lambda x: 'winner' in x)]
# sufficient confidence and support
rules = rules[
    (rules['confidence'] > 0.7) & 
    (rules['support'] > 0.15)
]
# sort
rules = rules.sort_values(by='lift', ascending=False)
# only one rule per antecedent
rules = rules.drop_duplicates(subset=['antecedents'])

In [None]:
itemsets

In [None]:
best_rules = rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(5)

# generated by chatGPT
html = f"""
<div style="background-color: #121212; color: #f0f0f0; padding: 15px; font-family: 'Segoe UI', sans-serif; max-width: 900px; margin: 20px auto; border-radius: 6px;">
    <h2 style="text-align: center; margin-bottom: 15px;">Top Unique Association Rules Predicting Target=1</h2>
    {best_rules.to_html(index=False, border=0, classes='rules-table')}
</div>
<style>
    .rules-table {{
        width: 100%;
        border-collapse: collapse;
        color: #dcdcdc;
    }}
    .rules-table th {{
        background-color: #2b2b2b;
        padding: 8px;
        text-align: left;
    }}
    .rules-table td {{
        background-color: #1e1e1e;
        padding: 8px;
    }}
    .rules-table tr:hover td {{
        background-color: #3a3a3a;
    }}
</style>
"""
display(HTML(html))

In [None]:
derived_df = {}

for antecedents in best_rules['antecedents']:
    def match_rule(row):  # noqa: D103
        for cond in antecedents:
            if '>' in cond:
                feat = cond.split('>')[0]
                if not row[feat] > 0:
                    return 0
            else:
                feat = cond.split('<=')[0]
                if not row[feat] <= 0:
                    return 0
        return 1
    derived_df[" & ".join(sorted(antecedents))] = pairwise_df.apply(match_rule, axis=1)

derived_rules = derived_df.keys()
derived_df = pd.DataFrame(derived_df)

### Decision tree

In [None]:
discretized_df = responses.merge(sampled_defects[['submission id', 'defect id']], on='submission id', how='left')
discretized_df = discretized_df[~(discretized_df['answer'] == discretized_df['defect id'])]

defect1_features = []
defect2_features = []

# add categorical features
for feature, discretized_values in features.items():
    defect1_name = f'defect1 {feature}'
    defect2_name = f'defect2 {feature}'

    defect1_features.append(defect1_name)
    defect2_features.append(defect2_name)

    discretized_df[defect1_name] = np.zeros(len(discretized_df))
    discretized_df[defect2_name] = np.zeros(len(discretized_df))

    for idx, row in discretized_df.iterrows():
        discretized_df.at[idx, defect1_name] = discretized_values.loc[row['submission id'], row['answer']]
        discretized_df.at[idx, defect2_name] = discretized_values.loc[row['submission id'], row['defect id']]

discretized_pairwise_df = []

for index, (_, group) in enumerate(discretized_df.groupby(['submission id', 'respondent'])):
    for _, row in group.iterrows():    
        discretized_pairwise_df.append({
            'response id': index,
            'defect1': row['answer'],
            'defect2': row['defect id'],
            'first chosen': 1,
            **{defect1_features[i]: row[defect2_features[i]] for i in range(len(defect1_features))},
            **{defect2_features[i]: row[defect1_features[i]] for i in range(len(defect2_features))}
        })

        # also add the reverse
        discretized_pairwise_df.append({
            'response id': index,
            'defect1': row['defect id'],
            'defect2': row['answer'],
            'first chosen': 0,
            **{defect1_features[i]: row[defect1_features[i]] for i in range(len(defect1_features))},
            **{defect2_features[i]: row[defect2_features[i]] for i in range(len(defect2_features))}
        })

discretized_pairwise_df = pd.DataFrame(discretized_pairwise_df)


In [None]:
from sklearn.tree import DecisionTreeClassifier, plot_tree

X = discretized_pairwise_df[defect1_features + defect2_features]
y = discretized_pairwise_df['first chosen']

tree = DecisionTreeClassifier(max_depth=3, random_state=42)
tree.fit(X, y)

plt.figure(figsize=big_figsize, layout="constrained")
plot_tree(tree, feature_names=X.columns, filled=True, rounded=True, class_names=['Chosen Second','Chosen First'])
plt.title(f"Shallow Decision Tree for Feature Interactions (ACC: {tree.score(X, y):.2f})")
plt.show()

In [None]:
importances = tree.feature_importances_
importance_df = pd.DataFrame({
    'feature': X.columns,
    'importance': importances
}).sort_values(by='importance', ascending=False)

print(importance_df.head(10))

## Combined

In [None]:
X = pd.concat([pairwise_df[differential_features], discretized_pairwise_df[defect1_features + defect2_features], derived_df[derived_rules]], axis=1)
y = pairwise_df['first chosen'].astype(bool)

In [None]:
X = pd.concat([pairwise_df[differential_features], discretized_pairwise_df[defect1_features + defect2_features], derived_df[derived_rules]], axis=1)
y = pairwise_df['first chosen'].astype(bool)

if training:
    X, _, y, _, groups, _ = train_test_split(
        X, y, groups, test_size=0.2, random_state=42, stratify=y
    )

models = {
    "Decision Tree": DecisionTreeClassifier(max_depth=5, random_state=4444),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Naive Bayes": GaussianNB()
}

results_df = []

for name, model in models.items():
    preds, truths = leave_one_group_out_for_model(model, X, y, groups=pairwise_df['response id'])
    summary_df = summarize_model_performance(truths, preds, model_name=name)
    results_df.append(summary_df)

final_results_df = pd.concat(results_df).reset_index(drop=True)
print(final_results_df)