In [1]:
import pandas as pd
import numpy as np
import time
from sklearn.ensemble import IsolationForest
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

data_path = 'final_adjusted_crowd_dataset.csv'
df = pd.read_csv(data_path)

df = pd.get_dummies(df, drop_first=True)
df = df.dropna()

X = df.drop(columns=['label'])

max_samples_values = [0.5, 0.75, 1.0]
max_samples_results = {}

for max_samples in max_samples_values:
    start_time = time.time()
    iso_forest = IsolationForest(contamination=0.05, max_samples=max_samples, random_state=42)
    iso_forest.fit(X)
    y_pred = np.where(iso_forest.predict(X) == -1, 1, 0)
    f1 = f1_score(df['label'], y_pred)
    precision = precision_score(df['label'], y_pred)
    recall = recall_score(df['label'], y_pred)
    accuracy = accuracy_score(df['label'], y_pred)
    runtime = time.time() - start_time
    max_samples_results[max_samples] = {
        'F1-Score': f1,
        'Precision': precision,
        'Recall': recall,
        'Accuracy': accuracy,
        'Runtime (seconds)': runtime
    }

for max_samples, metrics in max_samples_results.items():
    print(f"Max Samples: {max_samples}")
    for metric, value in metrics.items():
        print(f"  {metric}: {value}")
    print()


Max Samples: 0.5
  F1-Score: 0.09173310036038004
  Precision: 0.5890603085553997
  Recall: 0.04973945997157745
  Accuracy: 0.4163099164853674
  Runtime (seconds): 2.6325387954711914

Max Samples: 0.75
  F1-Score: 0.09151468821666485
  Precision: 0.5876577840112202
  Recall: 0.049621032685930835
  Accuracy: 0.41616955575829884
  Runtime (seconds): 2.903280735015869

Max Samples: 1.0
  F1-Score: 0.09675657966582941
  Precision: 0.6213183730715287
  Recall: 0.05246328754144955
  Accuracy: 0.41953821320794443
  Runtime (seconds): 3.289877414703369

