# Activity classification

Classification of device activities (tracked with ActivityWatch) from EEG data.

In [None]:
# Imports
import logging
import matplotlib.pyplot as plt
import eegclassify
from eegclassify import main, load, clean, features, preprocess, plot, transform

# Set this to True to run on testing data
simulate_test = False
if simulate_test:
    import os
    os.environ['PYTEST_CURRENT_TEST'] = "true"
    
%matplotlib inline
plt.rcParams['figure.dpi'] = 300

In [None]:
%%javascript
document.title='erb-thesis/Activity - Jupyter'  // Set the document title to be able to track time spent working on the notebook with ActivityWatch

In [None]:
# Load data and save into special variable that won't be overwritten (since loading takes a while)
df_loaded = load.load_labeled_eeg2()

In [None]:
# TODO: Split data into sessions to perform out-of-session cross-validation

df_loaded.describe()

In [None]:
# Preprocess

df = df_loaded
df = preprocess.split_rows(df, min_duration=5)
#df = clean.clean(df)
df

In [None]:
plot.classdistribution(df)

In [None]:
# Can we do PCA on the signal?

logging.getLogger('eegclassify.transform').setLevel(logging.ERROR)
X, y = transform.signal_ndarray(df)
print(X.shape)
#plot.pca(X, y)

In [None]:
all_dfs = []

# all classes with decent count
all_dfs += [clean._remove_rare(df, "class", threshold_count=50)]

# codeprose
all_dfs += [clean._select_classes(
    df,
    "class",
    ["Editing->Code", "Editing->Prose"]
)]

# Twitter vs YouTube
all_dfs += [clean._select_classes(
    df,
    "class",
    ["Twitter", "YouTube"]
)]

# Code vs Twitter
all_dfs += [clean._select_classes(
    df,
    "class",
    ["Editing->Code", "Twitter"]
)]

# GitHub PR vs issue
all_dfs += [clean._select_classes(
    df,
    "class",
    ["GitHub->Issues", "GitHub->Pull request"]
)]

In [None]:
# Train
from collections import Counter
import importlib
importlib.reload(eegclassify.main)
importlib.reload(eegclassify.transform)

for df_train in all_dfs:
    print(Counter(df_train['class']))
    print(f"Hours of data: {round(len(df_train['class']) * 5 / 60 / 60, 2)}")
    try:
        main._train_raw(df_train, shuffle=True)
    except Exception as e:
        # TODO: Fix testing data such that it doesn't err
        print("Error while training", e)

In [None]:
for df_train in all_dfs:
    print(len(df_train))
    try:
        main._train_features(df_train)
    except Exception as e:
        # TODO: Fix testing data such that it doesn't err
        logger.exception("Error while training")