# Dependencies

In [17]:
import sys
from datetime import datetime, timedelta
import copy
from operator import itemgetter
import os
from multiprocessing import Pool, cpu_count
from pathlib import Path
import itertools
import glob
import time

from scipy.stats import pearsonr
import numpy as np
import pandas as pd
from skleurn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.svm import SVR
from sklearn.svm import LinearSVR
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt

from Chapter7.PrepareDatasetForLearning import PrepareDatasetForLearning
from Chapter7.Evaluation import ClassificationEvaluation
from Chapter7.Evaluation import RegressionEvaluation
from Chapter7.LearningAlgorithms import ClassificationAlgorithms
from Chapter7.LearningAlgorithms import RegressionAlgorithms

from util.VisualizeDataset import VisualizeDataset


# Feature Selection

We perform selection on 20% of the dataset.

#### Initial Train split for feature selection

In [None]:
dataset = pd.read_csv("dataset_gran_250.csv")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=3)

train = dataset.sample(frac=0.2,random_state=200)
train_x = train.drop(columns=['act'])
train_y = train['act']

#### Forward Selection

In [None]:
 selected_features, ordered_features, ordered_scores = FeatureSelectionClassification().forward_selection(max_features, train_X, train_y)

# Experiments - whole data set

In [None]:
# dataset = pd.read_csv(Path('selected_set.csv'))

Begin with random forest across all targets. Whatever target seems to be most predictable we will apply other algorithms to.

Predict activity label - whole dataset.

In [None]:
#Take random samples for this experiment
train=dataset.sample(frac=0.3) #random state is a seed value
test=dataset.drop(train.index)
test= test.sample(frac=0.3)

train_y = train['act']
train_X = train.drop(columns=['act'])
test_y = test['act']
test_X = test.drop(columns=['act'])

pred_training_y, pred_test_y, frame_prob_training_y, frame_prob_test_y = ClassificationAlgorithms().random_forest(train_X, train_y, test_X)



Predict age based on sensor values - whole dataset

In [None]:
train=dataset.sample(frac=0.3)
test=dataset.drop(train.index)
test= test.sample(frac=0.3)

train_y = train['age']
train_X = train.drop(columns=['age'])
test_y = test['age']
test_X = test.drop(columns=['age'])

return pred_training_y, pred_test_y = RegressionAlgorithms().random_forest(self, train_X, train_y, test_X)

Predict gender - whole dataset

In [None]:
train=dataset.sample(frac=0.3)
test=dataset.drop(train.index)
test= test.sample(frac=0.3)

train_y = train['gender']
train_X = train.drop(columns=['gender'])
test_y = test['gender']
test_X = test.drop(columns=['gender'])

pred_training_y, pred_test_y, frame_prob_training_y, frame_prob_test_y = ClassificationAlgorithms().random_forest(train_X, train_y, test_X)

Predict weight - whole dataset

In [None]:
train=dataset.sample(frac=0.3)
test=dataset.drop(train.index)
test= test.sample(frac=0.3)

train_y = train['weight']
train_X = train.drop(columns=['weight'])
test_y = test['weight']
test_X = test.drop(columns=['weight'])


return pred_training_y, pred_test_y = RegressionAlgorithms().random_forest(self, train_X, train_y, test_X)

# Experiments - By Trial

In [None]:
trials = [dataset[dataset['trial']==i] for i in dataset['trial'].unique()]

In [None]:
class_alg = ClassificationAlgorithms()

def generate_sets(data_set):
    train=dataset.sample(frac=0.3) #random state is a seed value
    test=dataset.drop(train.index)
    test= test.sample(frac=0.3)

    train_y = train['act']
    train_X = train.drop(columns=['act'])
    test_y = test['act']
    test_X = test.drop(columns=['act'])
    
    return train_y, train_X, test_y, test_X



def train_classification(train_y, train_X, test_y, test_X, class_alg):
    pred_training_y, pred_test_y, frame_prob_training_y, frame_prob_test_y = class_alg.random_forest(train_X, train_y, test_X)
    return pred_training_y, pred_test_y, frame_prob_training_y, frame_prob_test_y

def evaluate_classifcation(y_true, y_pred):
    evaluation = ClassificationEvaluation()
    return evaluation.accuracy(y_true, y_pred), evaluation.precision(y_true, y_pred), evaluation.recall(y_true, y_pred), evaluation.f1(y_true, y_pred)



Random forest for the labels by trial

In [None]:
acc = []
prec = []
recall = []
f1 = []

for subset in trials:
    train_y, train_X, test_y, test_X = generate_sets(subset)
    pred_training_y, pred_test_y, frame_prob_training_y, frame_prob_test_y = train_classification(train_y, train_X, test_y, test_X, class_alg)
    accuracy, precision, recall, f1_ = evaluate_classifcation(test_y, pred_test_y)
    acc.append(accuracy)
    prec.append(precision)
    recall.append(recall)
    f1.append(f1_)