In [4]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from MarkovModels.MarkovModelConstructor import MarkovModelConstructor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split

from sklearn.tree import export_text


In [None]:
path = "/home/gatemrou/uds/thesis/Thesis-Project/analysis/data/final_datasets/final_experiment_fixations.csv"
df = pd.read_csv(path)
states = ['sent_msg', 'trgt', 'comp', 'dist', 'av_msgs']
keep_non_aoi = False
if keep_non_aoi:
    states.append('non_aoi')
MarkovModelConstr = MarkovModelConstructor(states)
save_path = '/home/gatemrou/uds/thesis/Thesis-Project/analysis/data/markov_models'
for explode in [False, True]:
    for per in ['participant', 'trial']:
        MarkovModelConstr.create_markov_models(df, states, include_non_aoi=False, save=True, explode=explode, per=per, save_path=save_path)


In [5]:

def fit_classifier(df: pd.DataFrame, model, target):
    features = [col for col in df.columns if col != target]
    X = df[features].to_numpy()
    y = df[target].to_numpy()
    

    # perform cv to estimate performance better 

    param_grid = {
        "max_depth": [3, 4],
        "min_samples_split": [10, 15, 20],
        "min_samples_leaf": [5, 10, 15],
        "ccp_alpha": [0.0001, 0.005, 0.01],
        "max_features": [None]
    }

    
    from sklearn.model_selection import GridSearchCV
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='f1_weighted', n_jobs=-1)
    grid_search.fit(X, y)
    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best score: {grid_search.best_score_:.2f}")
    model = grid_search.best_estimator_

    return model

def visualize_tree_classifier(model, feature_names):
    from sklearn.tree import export_graphviz
    import graphviz

    dot_data = export_graphviz(model, out_file=None, 
                               feature_names=feature_names,  
                               class_names=list(map(str, model.classes_)),  
                               filled=True, rounded=True,  
                               special_characters=True)  
    graph = graphviz.Source(dot_data)  
    return graph


In [6]:
path = "/home/gatemrou/uds/thesis/Thesis-Project/analysis/data/final_datasets/final_experiment_fixations.csv"
df = pd.read_csv(path)
states = ['sent_msg', 'trgt', 'comp', 'dist', 'av_msgs']
keep_non_aoi = False
if keep_non_aoi:
    states.append('non_aoi')
MarkovModelConstr = MarkovModelConstructor(states)
save_path = '/home/gatemrou/uds/thesis/Thesis-Project/analysis/data/markov_models'
df = MarkovModelConstr.create_markov_models(df, states, include_non_aoi=False, save=False, explode=True, per='participant')
assert df is not None, "DataFrame should not be None after Markov Model construction"
df.drop(columns=['Scanpath', 'TransitionMatrix', 'Subject', 'Correct'], inplace=True)
target = 'StrategyLabel'
print(df.columns.drop('Condition'))

for condition in ['simple', 'complex']:
    print(f"Processing condition: {condition}")
    df_condition = df[df['Condition'] == condition]
    df_condition = df_condition.drop(columns=['Condition'])
    df_condition = pd.get_dummies(df_condition, columns=['MsgType'], drop_first=True)

    scanp_model_tree = DecisionTreeClassifier(random_state=42, criterion='gini', class_weight='balanced', max_depth=3, ccp_alpha=0.001)
    scanp_fitted_model_tree = fit_classifier(df_condition, scanp_model_tree, target)

    graph = visualize_tree_classifier(scanp_fitted_model_tree, feature_names=list(df_condition.columns.drop(target)))
    graph.render(f"tree_visualizations/tree_classifier_scanp_{condition}", format='png', cleanup=True)



Index(['StrategyLabel', 'TrgtPos', 'MsgType', 'sent_msg_to_sent_msg',
       'sent_msg_to_trgt', 'sent_msg_to_comp', 'sent_msg_to_dist',
       'sent_msg_to_av_msgs', 'trgt_to_sent_msg', 'trgt_to_trgt',
       'trgt_to_comp', 'trgt_to_dist', 'trgt_to_av_msgs', 'comp_to_sent_msg',
       'comp_to_trgt', 'comp_to_comp', 'comp_to_dist', 'comp_to_av_msgs',
       'dist_to_sent_msg', 'dist_to_trgt', 'dist_to_comp', 'dist_to_dist',
       'dist_to_av_msgs', 'av_msgs_to_sent_msg', 'av_msgs_to_trgt',
       'av_msgs_to_comp', 'av_msgs_to_dist', 'av_msgs_to_av_msgs'],
      dtype='object')
Processing condition: simple
Best parameters: {'ccp_alpha': 0.0001, 'max_depth': 4, 'max_features': None, 'min_samples_leaf': 15, 'min_samples_split': 10}
Best score: 0.47
Processing condition: complex
Best parameters: {'ccp_alpha': 0.0001, 'max_depth': 4, 'max_features': None, 'min_samples_leaf': 10, 'min_samples_split': 10}
Best score: 0.51


In [8]:
path = "/home/gatemrou/uds/thesis/Thesis-Project/analysis/data/final_datasets/final_experiment_participants.csv"
df = pd.read_csv(path)
df = df[df['Condition'] != 'all']
df = df.drop(columns=['AnswerAccuracy', 'Subject', 'MeanAnswerTime'])
target = 'StrategyLabel'
print(df.columns.drop('Condition'))

for condition in ['simple', 'complex']:
    print(f"Processing condition: {condition}")
    df_condition = df[df['Condition'] == condition]
    if df_condition.empty:
        print(f"No data for condition: {condition}")
        continue
    
    # Drop unnecessary columns
    df_condition = df_condition.drop(columns=['Condition'])
    
    # Fit the model
    model_tree = DecisionTreeClassifier(random_state=42, criterion='gini', class_weight='balanced', max_depth=3, ccp_alpha=0.001)
    prop_fitted_model_tree = fit_classifier(df_condition, model_tree, target)
    
    # Visualize the tree classifier
    graph = visualize_tree_classifier(prop_fitted_model_tree, feature_names=list(df_condition.columns.drop(target)))
    graph.render(f"tree_visualizations/tree_classifier_prop_{condition}", format='png', cleanup=True)


Index(['PropTimeOnSentMsg', 'PropTimeOnAvailableMsgs', 'PropTimeOnTrgt',
       'PropTimeOnDist', 'PropTimeOnComp', 'PropTimeOnNonAOI',
       'RateTogglingAvailableMsgs', 'NumTogglesAvailableMsgs',
       'StrategyLabel'],
      dtype='object')
Processing condition: simple
Best parameters: {'ccp_alpha': 0.0001, 'max_depth': 3, 'max_features': None, 'min_samples_leaf': 15, 'min_samples_split': 10}
Best score: 0.50
Processing condition: complex
Best parameters: {'ccp_alpha': 0.0001, 'max_depth': 3, 'max_features': None, 'min_samples_leaf': 15, 'min_samples_split': 10}
Best score: 0.56


In [None]:
path = "/home/gatemrou/uds/thesis/Thesis-Project/analysis/data/final_datasets/final_experiment_fixations.csv"
df = pd.read_csv(path)
states = ['sent_msg', 'trgt', 'comp', 'dist', 'av_msgs']
keep_non_aoi = False
if keep_non_aoi:
    states.append('non_aoi')
MarkovModelConstr = MarkovModelConstructor(states)
save_path = '/home/gatemrou/uds/thesis/Thesis-Project/analysis/data/markov_models'
df = MarkovModelConstr.create_markov_models(df, states, include_non_aoi=False, save=False, explode=True, per='participant')
assert df is not None, "DataFrame should not be None after Markov Model construction"
df = pd.get_dummies(df, columns=['MsgType'], drop_first=True)
from sklearn.ensemble import RandomForestClassifier
target = 'Condition'
df.drop(columns=['Scanpath', 'TransitionMatrix', 'Subject', 'Correct'], inplace=True)

model_forest = RandomForestClassifier(n_estimators=1, random_state=42, criterion='gini')
fitted_model_forest = fit_classifier(df, model_forest, target)
# model_knn = KNeighborsClassifier(n_neighbors=20)
# fitted_model_knn = fit_classifier(df, model_knn, target)
# model_nn = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
# fitted_model_nn = fit_classifier(df, model_nn, target)

Model: RandomForestClassifier, Accuracy: 0.44
Confusion Matrix:
Predicted    complex  simple  unambiguous
Actual                                   
complex            7       9            4
simple             4       9            6
unambiguous        3       8           11
Model: KNeighborsClassifier, Accuracy: 0.36
Confusion Matrix:
Predicted    complex  simple  unambiguous
Actual                                   
complex           11       9            0
simple            12       7            0
unambiguous       10       8            4
Model: MLPClassifier, Accuracy: 0.56
Confusion Matrix:
Predicted    complex  simple  unambiguous
Actual                                   
complex            8      11            1
simple             6      10            3
unambiguous        1       5           16




In [7]:
path = "/home/gatemrou/uds/thesis/Thesis-Project/analysis/data/final_datasets/final_experiment_participants.csv"
df = pd.read_csv(path)
df = df[df['Condition'] != 'all']
target = 'Condition'
df = df.drop(columns=['Subject'])
df = pd.get_dummies(df, columns=['StrategyLabel'], drop_first=True)
print('Features: ', df.columns.drop(target).tolist())

model_forest = RandomForestClassifier(n_estimators=1, random_state=42, criterion='gini')
fitted_model_forest = fit_classifier(df, model_forest, target)
model_knn = KNeighborsClassifier(n_neighbors=15)
fitted_model_knn = fit_classifier(df, model_knn, target)
model_nn = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=500, random_state=42, solver='adam')
fitted_model_nn = fit_classifier(df, model_nn, target)

Features:  ['PropTimeOnSentMsg', 'PropTimeOnAvailableMsgs', 'PropTimeOnTrgt', 'PropTimeOnDist', 'PropTimeOnComp', 'PropTimeOnNonAOI', 'RateTogglingAvailableMsgs', 'MeanAnswerTime', 'AnswerAccuracy', 'StrategyLabel_1.0', 'StrategyLabel_2.0']
Model: RandomForestClassifier, Accuracy: 0.61
Confusion Matrix:
Predicted    complex  simple  unambiguous
Actual                                   
complex           10       4            3
simple             9      13            2
unambiguous        3       3           14
Model: KNeighborsClassifier, Accuracy: 0.61
Confusion Matrix:
Predicted    complex  simple  unambiguous
Actual                                   
complex           12       3            2
simple             9       9            6
unambiguous        2       2           16
Model: MLPClassifier, Accuracy: 0.39
Confusion Matrix:
Predicted    complex  simple
Actual                      
complex            0      17
simple             0      24
unambiguous        1      19
