# Define Parameters for saving and data

In [1]:
import utils

cv_folds = 5 
train_portion=0.7 
fps=90 
f=44
b=200
a=200
d_type='unityVel'
pre_path="../" # Important to load the data correctly 
save_folder="../results/"

save_folder = utils.create_unique_folder("Results/ml_algos_unity_vel_b200_a200")

# Create function to test the different methods

In [2]:
from sklearn.model_selection import ParameterGrid, KFold
import utils
import numpy as np
import os
import json
from tqdm import tqdm

def test_method(method, parameters, cv_folds=5, train_portion=0.7, fps=90, f=44, b=200, a=200, d_type='unityVel', pre_path="../", save_folder="../results/"):
    results = dict()
    best_model_results = dict()
    # cond = "gaze"
    n_loops = np.prod([len(parameters[key]) for key in parameters.keys()])*3
    pbar = tqdm(total=n_loops, desc="Testing method: %s" % method.__name__, unit="model")
    for cond in ["gaze", "headAndGaze", "nod"]:
        results[cond] = dict()
        angles_correct, angles_incorrect, names_correct, names_incorrect = utils.load_raw_data(fps, cond, f, b, a, d_type, pre_path)
        pat_names = np.unique(names_correct)
        n = int(len(pat_names) * train_portion)
        # Split the data into training and testing sets
        train_pats = pat_names[:n]
        test_pats = pat_names[n:]
        kf = KFold(n_splits=cv_folds, shuffle=False, random_state=None)
        i = 0
        best_score = -np.inf
        for parameter in ParameterGrid(parameters):
            i += 1
            # print("Parameter: ", parameter)
            model_name = "Model%03d" % i
            # print("Model name: ", model_name)
            results[cond][model_name] = {
                "params": parameter,
                "cv_folds": list(),
                "correct": list(),
                "incorrect": list(),
                "macro": list(),
                "micro": list()
            }
            for fold, (train_idx, test_idx) in enumerate(kf.split(train_pats)):
                model = method(**parameter)
                # print("Fold: ", fold)
                # print("Train: ", train_pats[train_idx])
                # print("Test: ", train_pats[test_idx])
                train_angles = angles_correct[np.isin(names_correct, train_pats[train_idx])]
                test_angles_correct = angles_correct[np.isin(names_correct, train_pats[test_idx])]
                test_angles_incorrect = angles_incorrect[np.isin(names_incorrect, train_pats[test_idx])]
                # print("Train angles shape: ", train_angles.shape)
                # print("Test angles correct shape: ", test_angles_correct.shape)
                # print("Test angles incorrect shape: ", test_angles_incorrect.shape)
                model.fit(train_angles)
                pred_correct = model.predict(test_angles_correct)
                pred_incorrect = model.predict(test_angles_incorrect)
                acc_correct = np.mean(pred_correct == 1)
                acc_incorrect = np.mean(pred_incorrect == -1)
                macro = np.mean([acc_correct, acc_incorrect])
                micro = (acc_correct * len(pred_correct) + acc_incorrect * len(pred_incorrect)) / (len(pred_correct) + len(pred_incorrect))
                # print("Fold: ", fold, "Accuracy correct: ", acc_correct, "Accuracy incorrect: ", acc_incorrect, "Macro: ", macro, "Micro: ", micro)
                results[cond][model_name]["cv_folds"].append(fold)
                results[cond][model_name]["correct"].append(acc_correct)
                results[cond][model_name]["incorrect"].append(acc_incorrect)
                results[cond][model_name]["macro"].append(macro)
                results[cond][model_name]["micro"].append(micro)
            score = np.mean(results[cond][model_name]["macro"])
            if score > best_score:
                best_score = score
                # best_model = model_name
                best_params = parameter
                best_results = {"correct": results[cond][model_name]["correct"],
                                "incorrect": results[cond][model_name]["incorrect"],
                                "macro": results[cond][model_name]["macro"],
                                "micro": results[cond][model_name]["micro"]}
            pbar.update(1)
        # test best model
        model = method(**best_params)
        model.fit(angles_correct[np.isin(names_correct, train_pats)])
        test_angles_correct = angles_correct[np.isin(names_correct, test_pats)]
        test_angles_incorrect = angles_incorrect[np.isin(names_incorrect, test_pats)]
        pred_correct = model.predict(test_angles_correct)
        pred_incorrect = model.predict(test_angles_incorrect)
        acc_correct = np.mean(pred_correct == 1)
        acc_incorrect = np.mean(pred_incorrect == -1)
        best_model_results[cond] = {
            "best_params": best_params,
            "best_score": best_score,
            "best_results": best_results,
            "correct": acc_correct,
            "incorrect": acc_incorrect,
            "macro": np.mean([acc_correct, acc_incorrect]),
            "micro": (acc_correct * len(pred_correct) + acc_incorrect * len(pred_incorrect)) / (len(pred_correct) + len(pred_incorrect))
        }

    # save results to json file
    # save_folder = utils.create_unique_folder(save_folder)
    with open (os.path.join(save_folder, f"{method.__name__}_cv.json"), "w") as json_file:
        json.dump(results, json_file, indent=4)

    with open (os.path.join(save_folder, f"{method.__name__}.json"), "w") as json_file:
        json.dump(best_model_results, json_file, indent=4)



# Test Methods

In [3]:
methods = dict()

## One Class SVM

In [4]:
from sklearn.svm import OneClassSVM
methods["OneClassSVM"] = {
    "method": OneClassSVM,
    "parameters": {
        'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
        'nu': [0.1, 0.2, 0.3, 0.4, 0.5],
        'gamma': ['scale', 'auto'],
        'max_iter': [1000]
    }
}

## Isolation Forest

In [5]:
from sklearn.ensemble import IsolationForest

methods["IsolationForest"] = {
    "method": IsolationForest,
    "parameters": {
        'n_estimators': [100, 200, 300],
        'max_samples': ['auto', 0.5, 0.75],
        'contamination': ['auto', 0.05, 0.1, 0.2],
        'max_features': [0.1, 0.5, 0.75, 1.0]
    }
}

# test_method(method, parameters, cv_folds=cv_folds, train_portion=train_portion, fps=fps, f=f, b=b, a=a, d_type=d_type, pre_path=pre_path, save_folder=save_folder)

## Local Outlier Factor

In [None]:
from sklearn.neighbors import LocalOutlierFactor
methods["LocalOutlierFactor"] = {
    "method": LocalOutlierFactor,
    "parameters": {
        "algorithm": ["auto", "ball_tree", "kd_tree", "brute"],
        "leaf_size": [10, 20, 30, 40, 50],
        "metric": ["euclidean", "minkowski", "manhattan", "chebyshev"],
        "n_neighbors": [5, 10, 20, 50],
        "contamination": ["auto", 0.05, 0.1, 0.2],
        "novelty": [True]
    }
}

## Elliptic Envelope

In [7]:
from sklearn.covariance import EllipticEnvelope
methods["EllipticEnvelope"] = {
    "method": EllipticEnvelope,
    "parameters": {
        'contamination': [0.01, 0.05, 0.1, 0.2, 0.4, 0.5],
        'support_fraction': [None, 0.1, 0.2, 0.3, 0.4, 0.5],
        'random_state': [42],
    }
}

## Test all methods

In [8]:
for m in methods.keys():
    method = methods[m]["method"]
    parameters = methods[m]["parameters"]
    print(f"Testing {m}...")
    test_method(method, parameters, cv_folds=cv_folds, train_portion=train_portion, fps=fps, f=f, b=b, a=a, d_type=d_type, pre_path=pre_path, save_folder=save_folder)

Testing OneClassSVM...


Testing method: OneClassSVM: 100%|██████████| 120/120 [01:24<00:00,  1.41model/s]


Testing IsolationForest...


Testing method: IsolationForest: 100%|██████████| 432/432 [14:38<00:00,  2.03s/model]


Testing LocalOutlierFactor...


Testing method: LocalOutlierFactor:   0%|          | 4/3840 [00:04<1:16:33,  1.20s/model]

InvalidParameterError: The 'metric' parameter of LocalOutlierFactor must be a str among {'canberra', 'matching', 'l1', 'hamming', 'chebyshev', 'rogerstanimoto', 'minkowski', 'pyfunc', 'precomputed', 'cityblock', 'sokalmichener', 'euclidean', 'nan_euclidean', 'russellrao', 'kulsinski', 'mahalanobis', 'wminkowski', 'braycurtis', 'cosine', 'correlation', 'jaccard', 'haversine', 'p', 'seuclidean', 'dice', 'l2', 'sokalsneath', 'yule', 'manhattan', 'sqeuclidean', 'infinity'} or a callable. Got 'minkowsky' instead.