In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from MarkovModels.MarkovModelConstructor import MarkovModelConstructor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier


In [38]:

def fit_classifier(df: pd.DataFrame, model, target):
    features = [col for col in df.columns if col != target]
    X = df[features].to_numpy()
    y = df[target].to_numpy()
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model.fit(X_train, y_train)
    
    # Evaluate the model
    conf_matrix = pd.crosstab(y_test, model.predict(X_test), rownames=['Actual'], colnames=['Predicted'])
    f1 = f1_score(model.predict(X_test), y_test, average='weighted')
    print(f"Model: {model.__class__.__name__}, Accuracy: {f1:.2f}")
    print("Confusion Matrix:")
    print(conf_matrix)
    
    return model


In [39]:
path = "C:\\Users\\timur\\uds\\Thesis-Project\\analysis\\data\\final_datasets\\final_experiment_fixations.csv"
df = pd.read_csv(path)
states = ['sent_msg', 'trgt', 'comp', 'dist', 'av_msgs']
keep_non_aoi = False
if keep_non_aoi:
    states.append('non_aoi')
MarkovModelConstr = MarkovModelConstructor(states)
df = MarkovModelConstr.create_markov_models(df, states, include_non_aoi=False, save=False, explode=True, per='participant')
# print(df.iloc[0]['TransitionMatrix'])
assert df is not None, "DataFrame should not be None after Markov Model construction"
# only keep simple condition
df = df[df['Condition'] == 'simple']
df.drop(columns=['Condition'], inplace=True)
df = pd.get_dummies(df, columns=['MsgType'], drop_first=True)
from sklearn.ensemble import RandomForestClassifier
target = 'StrategyLabel'
df.drop(columns=['Scanpath', 'TransitionMatrix', 'Subject', 'Correct'], inplace=True)

model_forest = RandomForestClassifier(n_estimators=1, random_state=42, criterion='gini', class_weight='balanced')
fitted_model_forest = fit_classifier(df, model_forest, target)
model_knn = KNeighborsClassifier(n_neighbors=20)
fitted_model_knn = fit_classifier(df, model_knn, target)
model_nn = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
fitted_model_nn = fit_classifier(df, model_nn, target)

  self.transition_matrix = self.joint_probabilities / self.prior_probabilities[:, np.newaxis]


Model: RandomForestClassifier, Accuracy: 0.63
Confusion Matrix:
Predicted  0  1  2
Actual            
0          2  1  3
1          1  2  1
2          0  2  9
Model: KNeighborsClassifier, Accuracy: 0.69
Confusion Matrix:
Predicted   2
Actual       
0           6
1           4
2          11
Model: MLPClassifier, Accuracy: 0.65
Confusion Matrix:
Predicted  0  1   2
Actual             
0          2  2   2
1          1  1   2
2          1  0  10




In [43]:
path = "C:\\Users\\timur\\uds\\Thesis-Project\\analysis\\data\\final_datasets\\final_experiment_participants.csv"
df = pd.read_csv(path)
df = df[df['Condition'] != 'all']
target = 'StrategyLabel'
df = pd.get_dummies(df, columns=['Condition'], drop_first=True)
df = df.drop(columns=['Subject'])
print('Features: ', df.columns.drop(target).tolist())

class_weight = df[target].value_counts(normalize=True).to_dict()
print('Class weights: ', class_weight   )

model_forest = RandomForestClassifier(n_estimators=1, random_state=42, criterion='gini', class_weight='balanced')
fitted_model_forest = fit_classifier(df, model_forest, target)
model_knn = KNeighborsClassifier(n_neighbors=20)
fitted_model_knn = fit_classifier(df, model_knn, target)
model_nn = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42, solver='adam')
fitted_model_nn = fit_classifier(df, model_nn, target)

Features:  ['PropTimeOnSentMsg', 'PropTimeOnAvailableMsgs', 'PropTimeOnTrgt', 'PropTimeOnDist', 'PropTimeOnComp', 'PropTimeOnNonAOI', 'RateTogglingAvailableMsgs', 'MeanAnswerTime', 'AnswerAccuracy', 'Condition_simple', 'Condition_unambiguous']
Class weights:  {2.0: 0.5544554455445545, 0.0: 0.24752475247524752, 1.0: 0.19801980198019803}
Model: RandomForestClassifier, Accuracy: 0.45
Confusion Matrix:
Predicted  0.0  1.0  2.0
Actual                  
0.0          4    4    8
1.0          2    4    8
2.0          7    5   19
Model: KNeighborsClassifier, Accuracy: 0.58
Confusion Matrix:
Predicted  0.0  1.0  2.0
Actual                  
0.0          0    3   13
1.0          1    0   13
2.0          1    2   28
Model: MLPClassifier, Accuracy: 0.37
Confusion Matrix:
Predicted  1.0
Actual        
0.0         16
1.0         14
2.0         31


In [52]:
path = "C:\\Users\\timur\\uds\\Thesis-Project\\analysis\\data\\final_datasets\\final_experiment_participants.csv"
df = pd.read_csv(path)
df = df[df['Condition'] != 'all']
target = 'Condition'
df = df.drop(columns=['Subject'])
df = pd.get_dummies(df, columns=['StrategyLabel'], drop_first=True)
print('Features: ', df.columns.drop(target).tolist())

model_forest = RandomForestClassifier(n_estimators=1, random_state=42, criterion='gini')
fitted_model_forest = fit_classifier(df, model_forest, target)
model_knn = KNeighborsClassifier(n_neighbors=15)
fitted_model_knn = fit_classifier(df, model_knn, target)
model_nn = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=500, random_state=42, solver='adam')
fitted_model_nn = fit_classifier(df, model_nn, target)

Features:  ['PropTimeOnSentMsg', 'PropTimeOnAvailableMsgs', 'PropTimeOnTrgt', 'PropTimeOnDist', 'PropTimeOnComp', 'PropTimeOnNonAOI', 'RateTogglingAvailableMsgs', 'MeanAnswerTime', 'AnswerAccuracy', 'StrategyLabel_1.0', 'StrategyLabel_2.0']
Model: RandomForestClassifier, Accuracy: 0.60
Confusion Matrix:
Predicted    complex  simple  unambiguous
Actual                                   
complex           10       4            3
simple             9      13            2
unambiguous        3       3           14
Model: KNeighborsClassifier, Accuracy: 0.62
Confusion Matrix:
Predicted    complex  simple  unambiguous
Actual                                   
complex           12       3            2
simple             9       9            6
unambiguous        2       2           16
Model: MLPClassifier, Accuracy: 0.56
Confusion Matrix:
Predicted    complex  simple
Actual                      
complex            0      17
simple             0      24
unambiguous        1      19
