In [1]:
!pip install scikit-learn==1.3.2



In [2]:
import json
from pathlib import Path

import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold


In [3]:
!unzip traces.zip

Archive:  traces.zip
  inflating: drawn_apart_gen10_brave.csv  
  inflating: drawn_apart_gen10_chrome.csv  
  inflating: locked_apart_gen10_brave.csv  
  inflating: locked_apart_gen10_chrome.csv  


In [4]:
CSVS = ['drawn_apart_gen10_brave.csv', 'drawn_apart_gen10_chrome.csv', 'locked_apart_gen10_brave.csv', 'locked_apart_gen10_chrome.csv']

In [5]:
def main_for_df(df):
    print(f'Median collection time: {df["time_took"].median()} ms')
    X, y = get_X_y(df)
    train_and_evaluate(X, y)


def get_X_y(df: pd.DataFrame):
    X = df['trace'].apply(lambda x: json.loads(x))
    X = pd.DataFrame(X.tolist())

    X = X.iloc[:, :-1]
    y = df['machine_id']

    return X, y


def train_and_evaluate(X: pd.DataFrame, y: pd.Series):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    accuracies = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        clf = RandomForestClassifier(n_estimators=600, random_state=42, n_jobs=4)
        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)

    print(f'Accuracy: {np.mean(accuracies)} ± {np.std(accuracies)}')

In [6]:
for csv_name in CSVS:
  print(f'Results for {Path(csv_name).stem}:')
  df = pd.read_csv(csv_name)
  main_for_df(df)

Results for drawn_apart_gen10_brave:
Median collection time: 1509.3999999277294 ms
Accuracy: 0.49466666666666664 ± 0.013359234102390326
Results for drawn_apart_gen10_chrome:
Median collection time: 1505.5999999977648 ms
Accuracy: 0.4064444444444445 ± 0.008955307413159945
Results for locked_apart_gen10_brave:
Median collection time: 92.19999999925494 ms
Accuracy: 0.592 ± 0.012240390536818163
Results for locked_apart_gen10_chrome:
Median collection time: 92.10000000149012 ms
Accuracy: 0.6133333333333334 ± 0.01572918966149903
