In [None]:
# Start writing code here...import pandas as pd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn                    import metrics
from sklearn.linear_model       import LogisticRegression, LinearRegression, RidgeClassifier, Lasso, SGDClassifier
from sklearn.neural_network     import MLPClassifier
from sklearn                    import svm
from sklearn.preprocessing      import StandardScaler, OrdinalEncoder, OneHotEncoder, PowerTransformer, QuantileTransformer
from sklearn.cluster            import KMeans
from sklearn.neighbors          import KNeighborsClassifier
from sklearn.naive_bayes        import GaussianNB, MultinomialNB
from sklearn.model_selection    import cross_val_score, ShuffleSplit, GridSearchCV, train_test_split, StratifiedKFold, cross_val_predict
from sklearn                    import pipeline
from sklearn.tree               import DecisionTreeClassifier
from sklearn.experimental       import enable_hist_gradient_boosting # for HistGradientBoostingClassifier
from sklearn.ensemble           import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, VotingClassifier
from xgboost                    import XGBClassifier
#from lightgbm                   import LGBMClassifier
from catboost                   import CatBoostClassifier
from sklearn                    import impute
from sklearn                    import compose
import datetime as dt
import time
import os

In [None]:
df = pd.read_csv("dataset_halfSecondWindow.csv", index_col='id')


train_users = df[(df['user'] != "U12") & (df['user'] != "U9") & (df['user'] != "U2")]
test_user = df[(df['user'] == "U12") | (df['user'] == "U9")| (df['user'] == "U2")]


In [None]:
mult_classifiers = {
        #"LM Linear Regression": LinearRegression(), # not useful for classification on titanic
        #"LM Logistic Regression": LogisticRegression(),
        "SGDC": SGDClassifier(),
        "LM Ridge": RidgeClassifier(),
        # "LM Lasso": Lasso(),
        "NN Multi layer Perceptron": MLPClassifier(random_state=909),
        "SVM Linear": svm.SVC(kernel='linear'),
        "SVM RBF": svm.SVC(kernel='rbf'),
        "KNN": KNeighborsClassifier(),
        "BM Guassian Naive Bayes": GaussianNB(),
}
tree_classifiers = {
        "Decision Tree": DecisionTreeClassifier(random_state=909),
        "Extra Trees":ExtraTreesClassifier(random_state=909),
        "Random Forest":RandomForestClassifier(n_estimators=100,random_state=909),
        "AdaBoost":AdaBoostClassifier(),
        #"Skl GBM":GradientBoostingClassifier(random_state=909),
        #"Skl HistGBM":HistGradientBoostingClassifier(random_state=909),
        #"XGBoost":XGBClassifier(use_label_encoder=True),
        #"LightGBM":LGBMClassifier(),
        "CatBoost":CatBoostClassifier(verbose=0),
        #"VotingClassifier": VotingClassifier(estimators=vc_models, voting='hard')
}

In [None]:
#### FEATURES TO BE SELECTED
num_vars_nan9 =[
    "android.sensor.rotation_vector#min",
    "android.sensor.linear_acceleration#max",
    "android.sensor.linear_acceleration#min",
    "android.sensor.game_rotation_vector#std",
    "android.sensor.linear_acceleration#std",
    "speed#std",
    "android.sensor.accelerometer#max",
    "android.sensor.gyroscope#min",
    "android.sensor.accelerometer#std",
    "android.sensor.orientation#min",
    "android.sensor.linear_acceleration#mean",
    "android.sensor.orientation#std",
    "speed#mean",
    "speed#max",
    "android.sensor.game_rotation_vector#mean",
    "android.sensor.accelerometer#mean",
    "android.sensor.gyroscope#max",
    "sound#mean",
    "android.sensor.gyroscope#std",
    "speed#min",
    "android.sensor.orientation#mean",
    "android.sensor.gyroscope#mean",
    "android.sensor.rotation_vector#mean",
    "android.sensor.rotation_vector#std",
    "android.sensor.gyroscope_uncalibrated#std",
    "android.sensor.gyroscope_uncalibrated#max",
    "android.sensor.rotation_vector#max",
    "android.sensor.gyroscope_uncalibrated#min",
    "android.sensor.orientation#max",
    "android.sensor.gyroscope_uncalibrated#mean",
    "sound#max",
    "sound#min",
    "android.sensor.game_rotation_vector#min",
    "android.sensor.accelerometer#min",
    "android.sensor.game_rotation_vector#max",
    "sound#std",
]


In [None]:
## SELET FEATURES FOR TRAIN/TEST SPLIT
x_train = train_users[num_vars_nan9]
x_val = test_user[num_vars_nan9]
y_train = train_users['target'].map({"Bus":2, "Car":2, "Still": 3, "Train":4, "Walking": 5})
y_val = test_user['target'].map({"Bus":2, "Car":2, "Still": 3, "Train":4, "Walking": 5})

In [None]:
### BUILD PIPELINES
cat_pipe = pipeline.Pipeline(steps=[
            ("OneHot", OrdinalEncoder(handle_unknown='mean'))
])

num_pipe_tree = pipeline.Pipeline(steps=[
            ('imputer', impute.SimpleImputer(strategy="constant", fill_value=0)),#fill_value=0
            #('Scaler',StandardScaler()),
])
num_pipe_mult = pipeline.Pipeline(steps=[
            ('imputer', impute.SimpleImputer(strategy="constant", fill_value=0)),#fill_value=0
            ('Scaler',StandardScaler()),
])


tree_pipe = compose.ColumnTransformer(transformers=[
            #('cats', cat_pipe, cat_vars1),
            ('nums0', num_pipe_tree, num_vars_nan9)],
            #('numsM', num_pipe_nanM, num_vars_nanM)],
            remainder='drop'
            )

mult_pipe = compose.ColumnTransformer(transformers=[
            #('cats', cat_pipe, cat_vars1),
            ('nums0', num_pipe_mult, num_vars_nan9)],
            #('numsM', num_pipe_nanM, num_vars_nanM)],
            remainder='drop')
tree_pipes = {model_name: pipeline.make_pipeline(tree_pipe, model) for model_name, model in tree_classifiers.items()}
mult_pipes = {model_name: pipeline.make_pipeline(mult_pipe, model) for model_name, model in mult_classifiers.items()}

all_pipes = {**tree_pipes, **mult_pipes}

In [None]:
### RUN MODELS
results = pd.DataFrame({'Model': [], 'Accuracy': [], 'Bal Acc.': [], 'Time': []})
for model_name, model in all_pipes.items():
    print(f"Working on: {model_name}")
    print(model)
    start_time = time.time()
    model.fit(x_train, y_train)
    pred = model.predict(x_val)
    end_time = time.time() - start_time

    results = results.append({
                          "Model":    model_name,
                          "Accuracy": metrics.accuracy_score(y_val, pred)*100,
                          "Bal Acc.": metrics.balanced_accuracy_score(y_val, pred)*100,
                          "Time":     end_time},
                          ignore_index=True)
    print(results)

Working on: Decision Tree
Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('nums0',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(fill_value=0,
                                                                                 strategy='constant'))]),
                                                  ['android.sensor.rotation_vector#min',
                                                   'android.sensor.linear_acceleration#max',
                                                   'android.sensor.linear_acceleration#min',
                                                   'android.sensor.game_rotation_vector#std',
                                                   'android.sensor.linear_accelerat...
                                                   'android.sensor.rotation_vector#std',
                                                   'a

In [None]:
print(results.head(12))

                        Model   Accuracy   Bal Acc.        Time
0               Decision Tree  57.564633  54.758244    2.505327
1                 Extra Trees  71.756620  67.453011    4.478802
2               Random Forest  81.488714  77.087366   24.172392
3                    AdaBoost  71.998750  67.389027   10.498035
4                    CatBoost  78.794033  72.981573  102.525018
5                        SGDC  68.726080  61.677309    2.006674
6                    LM Ridge  63.032102  56.572344    0.224708
7   NN Multi layer Perceptron  63.000859  59.299220  206.874665
8                  SVM Linear  70.053894  64.001813  266.871469
9                     SVM RBF  72.904788  69.810520  127.504581
10                        KNN  55.221432  53.685564   11.636965
11    BM Guassian Naive Bayes  65.828321  61.453055    0.120986


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=dd66c499-8cd9-44d2-87d5-ecc9c3ddd387' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>