In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn                    import metrics
from sklearn.linear_model       import LogisticRegression, LinearRegression, RidgeClassifier, Lasso, SGDClassifier
from sklearn.neural_network     import MLPClassifier
from sklearn                    import svm
from sklearn.preprocessing      import StandardScaler, OrdinalEncoder, OneHotEncoder, PowerTransformer, QuantileTransformer
from sklearn.cluster            import KMeans
from sklearn.neighbors          import KNeighborsClassifier
from sklearn.naive_bayes        import GaussianNB, MultinomialNB
from sklearn.model_selection    import cross_val_score, ShuffleSplit, GridSearchCV, train_test_split, StratifiedKFold, cross_val_predict
from sklearn                    import pipeline
from sklearn.tree               import DecisionTreeClassifier
from sklearn.experimental       import enable_hist_gradient_boosting # for HistGradientBoostingClassifier
from sklearn.ensemble           import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier
from xgboost                    import XGBClassifier
#from lightgbm                   import LGBMClassifier
from catboost                   import CatBoostClassifier
from sklearn                    import impute
from sklearn                    import compose
import datetime as dt
import time
import os


In [5]:
df = pd.read_csv("dataset_halfSecondWindow.csv", index_col='id')

train_users = df[(df['user'] != "U12") & (df['user'] != "U9") & (df['user'] != "U2")]
test_user = df[(df['user'] == "U12") | (df['user'] == "U9")| (df['user'] == "U2")]


print(train_users['target'].unique())
print(test_user['target'].unique())
print(f"TRAIN USERS {len(train_users)}")
print(f"TEST USERS {len(test_user)}")

['Bus' 'Car' 'Still' 'Train' 'Walking']
['Bus' 'Car' 'Still' 'Train' 'Walking']
TRAIN USERS 49782
TEST USERS 12803


In [6]:
tree_classifiers = {
        "Decision Tree": DecisionTreeClassifier(),
        "Extra Trees":ExtraTreesClassifier(),
        "Random Forest":RandomForestClassifier(),
        #"AdaBoost":AdaBoostClassifier(),
        #"Skl GBM":GradientBoostingClassifier(),
        "Skl HistGBM":HistGradientBoostingClassifier(),
        #"XGBoost":XGBClassifier(use_label_encoder=False),
        #"LightGBM":LGBMClassifier(),
        "CatBoost":CatBoostClassifier(verbose=0)
        }
        

In [13]:
cat_vars1 = ['user']

num_vars_nan0 = [
            "time",
            # "activityrecognition#0",
            "activityrecognition#1",
            "android.sensor.accelerometer#mean",
            "android.sensor.accelerometer#min",
            "android.sensor.accelerometer#max",
            "android.sensor.accelerometer#std",

            # "android.sensor.game_rotation_vector#mean",
            # "android.sensor.game_rotation_vector#min",
            # "android.sensor.game_rotation_vector#max",
            # "android.sensor.game_rotation_vector#std",

            # "android.sensor.gravity#mean",
            # "android.sensor.gravity#min",
            # "android.sensor.gravity#max",
            # "android.sensor.gravity#std",

            "android.sensor.gyroscope#mean",
            "android.sensor.gyroscope#min",
            "android.sensor.gyroscope#max",
            "android.sensor.gyroscope#std",

            "android.sensor.gyroscope_uncalibrated#mean",
            "android.sensor.gyroscope_uncalibrated#min",
            "android.sensor.gyroscope_uncalibrated#max",
            "android.sensor.gyroscope_uncalibrated#std",

            # "android.sensor.light#mean",
            # "android.sensor.light#min",
            # "android.sensor.light#max",
            # "android.sensor.light#std",

            "android.sensor.linear_acceleration#mean",
            "android.sensor.linear_acceleration#min",
            "android.sensor.linear_acceleration#max",
            "android.sensor.linear_acceleration#std",

            # "android.sensor.magnetic_field#mean",
            # "android.sensor.magnetic_field#min",
            # "android.sensor.magnetic_field#max",
            # "android.sensor.magnetic_field#std",

            # "android.sensor.magnetic_field_uncalibrated#mean",
            # "android.sensor.magnetic_field_uncalibrated#min",
            # "android.sensor.magnetic_field_uncalibrated#max",
            # "android.sensor.magnetic_field_uncalibrated#std",

            # "android.sensor.orientation#mean",
            # "android.sensor.orientation#min",
            # "android.sensor.orientation#max",
            # "android.sensor.orientation#std",

            "android.sensor.pressure#mean",
            "android.sensor.pressure#min",
            "android.sensor.pressure#max",
            "android.sensor.pressure#std",

            # "android.sensor.proximity#mean",
            # "android.sensor.proximity#min",
            # "android.sensor.proximity#max",
            # "android.sensor.proximity#std",

            # "android.sensor.rotation_vector#mean",
            # "android.sensor.rotation_vector#min",
            # "android.sensor.rotation_vector#max",
            # "android.sensor.rotation_vector#std",

            "android.sensor.step_counter#mean",
            "android.sensor.step_counter#min",
            "android.sensor.step_counter#max",
            "android.sensor.step_counter#std",

            # "sound#mean",
            # "sound#min",
            # "sound#max",
            # "sound#std",
            
            "speed#mean",
            "speed#min",
            "speed#max",
            "speed#std",
            
            #"target",
            #"user"
            ]


In [9]:
x_train = train_users[cat_vars1 + num_vars_nan0]
x_val = test_user[cat_vars1 + num_vars_nan0]
y_train = train_users['target'].map({"Bus":1, "Car":1, "Still": 2, "Train":3, "Walking": 4})
y_val = test_user['target'].map({"Bus":1, "Car":1, "Still": 2, "Train":3, "Walking": 4})

In [10]:
cat_pipe = pipeline.Pipeline(steps=[
            ("OneHot", OrdinalEncoder(handle_unknown='mean'))
])

num_pipe_nan0 = pipeline.Pipeline(steps=[
            ('imputer', impute.SimpleImputer(strategy="constant", fill_value=0)),
            #('Scaler',StandardScaler()),
])
num_pipe_nanM = pipeline.Pipeline(steps=[
            ('imputer', impute.SimpleImputer(strategy="mean")),
            #('Scaler',StandardScaler()),
])
main_pipe = compose.ColumnTransformer(transformers=[
            #('cats', cat_pipe, cat_vars1),
            ('nums0', num_pipe_nan0, num_vars_nan0)],
            #('numsM', num_pipe_nanM, num_vars_nanM)],
            remainder='drop')

all_pipes = {model_name: pipeline.make_pipeline(main_pipe, model) for model_name, model in tree_classifiers.items()}

In [11]:
results = pd.DataFrame({'Model': [], 'Accuracy': [], 'Bal Acc.': [], 'Time': []})
for model_name, model in all_pipes.items():
    print(f"Working on: {model_name}")
    print(model)
    start_time = time.time()
    model.fit(x_train, y_train)
    pred = model.predict(x_val)
    end_time = time.time() - start_time

    results = results.append({"Model":    model_name,
                              "Accuracy": metrics.accuracy_score(y_val, pred)*100,
                              "Bal Acc.": metrics.balanced_accuracy_score(y_val, pred)*100,
                              "Time":     end_time},
                              ignore_index=True)
    print(results)


Working on: Decision Tree
Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('nums0',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(fill_value=0,
                                                                                 strategy='constant'))]),
                                                  ['time',
                                                   'activityrecognition#1',
                                                   'android.sensor.accelerometer#mean',
                                                   'android.sensor.accelerometer#min',
                                                   'android.sensor.accelerometer#max',
                                                   'android.sensor.accelerometer#std',
                                                   'android.sensor.gyros...
                                      

In [26]:
cal_data = pd.DataFrame(data=pred, columns=['Predicions'])

In [38]:
cal_data.head()

Unnamed: 0,Predicions
0,5
1,5
2,5
3,5
4,5


In [55]:
frames = [cal_data, x_val.time]
my_data = pd.concat(frames,)
my_data

0
1
2
3
4
...
189304
189289
187291
184132
184591


In [49]:
cal_data.shape

(12803, 1)

In [12]:
results.head()

Unnamed: 0,Model,Accuracy,Bal Acc.,Time
0,Decision Tree,46.137624,45.189404,1.890338
1,Extra Trees,47.582598,43.292969,5.224442
2,Random Forest,60.313989,55.48592,18.38855
3,Skl HistGBM,67.179567,61.077153,293.860671
4,CatBoost,74.146684,68.503507,121.561105


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=dd66c499-8cd9-44d2-87d5-ecc9c3ddd387' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>