#### Model Training

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.preprocessing import OneHotEncoder

import warnings
warnings.simplefilter("ignore")

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

from sklearn.metrics import accuracy_score, classification_report,ConfusionMatrixDisplay,precision_score, recall_score, f1_score

In [2]:
train_data = pd.read_csv("data/train.csv")
test_data = pd.read_csv("data/test.csv")

#### 1. Dropping unneccasary columns & filling null values

In [3]:
train_data = train_data.drop(["Unnamed: 0", "label"],axis=1)
test_data = test_data.drop(["Unnamed: 0", "label"], axis=1)

In [4]:
train_data.sample(4)

Unnamed: 0,acc_max,gyro_max,acc_kurtosis,gyro_kurtosis,lin_max,acc_skewness,gyro_skewness,post_gyro_max,post_lin_max,fall
168,22.923513,5.514014,7.203781,7.572189,7.927216,1.832042,2.811104,5.403473,7.767944,1
568,29.061488,10.793478,10.504503,10.411445,11.5422,1.654158,3.180944,10.722292,11.435346,1
206,26.917732,5.675823,13.03712,5.52203,8.813933,2.940641,2.3366,5.548924,8.701363,1
617,10.055318,0.222049,3.16766,13.943183,0.144888,0.383374,2.939892,0.148176,0.004242,0


In [5]:
test_data.sample(4)

Unnamed: 0,acc_max,gyro_max,acc_kurtosis,gyro_kurtosis,lin_max,acc_skewness,gyro_skewness,post_gyro_max,post_lin_max,fall
209,21.417019,3.861969,0.05579,0.496954,8.558422,0.54317,0.724711,1.112507,1.273035,0
308,12.69776,1.143348,15.396142,1.438593,1.027263,0.678141,1.71508,1.106321,0.96295,0
37,21.436249,6.86832,8.228273,19.489903,6.907209,1.518319,3.584338,5.465603,4.875544,1
206,23.311836,4.943009,1.521303,-0.392252,10.001058,0.816939,0.701253,0.928469,4.555352,0


In [6]:
train_data.isnull().sum()

acc_max          0
gyro_max         0
acc_kurtosis     0
gyro_kurtosis    0
lin_max          0
acc_skewness     0
gyro_skewness    0
post_gyro_max    0
post_lin_max     0
fall             0
dtype: int64

In [7]:
test_data.isnull().sum()

acc_max          0
gyro_max         0
acc_kurtosis     0
gyro_kurtosis    0
lin_max          0
acc_skewness     0
gyro_skewness    0
post_gyro_max    0
post_lin_max     0
fall             0
dtype: int64

#### 3. Splitting independent and dependent feature for training and testing set

In [8]:
X_train = train_data.drop(["fall"], axis=1)
X_test = test_data.drop(["fall"], axis=1)

y_train = train_data[["fall"]]
y_test = test_data[["fall"]]

#### 4. Encoding parts

In [9]:
categorical_cols = X_train.select_dtypes(include="object").columns
numerical_cols = X_train.select_dtypes(exclude="object").columns

In [10]:
categorical_cols

Index([], dtype='object')

In [11]:
numerical_cols

Index(['acc_max', 'gyro_max', 'acc_kurtosis', 'gyro_kurtosis', 'lin_max',
       'acc_skewness', 'gyro_skewness', 'post_gyro_max', 'post_lin_max'],
      dtype='object')

#### 5. Defining the values for categorical columns

In [12]:
categorical_cols

Index([], dtype='object')

In [13]:
for i in categorical_cols:
    print(i, train_data[i].unique())

#### Libraries for handlind missing values, feature scaling and feature engineering

In [14]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [15]:
train_data.columns

Index(['acc_max', 'gyro_max', 'acc_kurtosis', 'gyro_kurtosis', 'lin_max',
       'acc_skewness', 'gyro_skewness', 'post_gyro_max', 'post_lin_max',
       'fall'],
      dtype='object')

#### 6. Feature Splittings

In [16]:
numerical_features = ["acc_max", "gyro_max", "acc_kurtosis", "gyro_kurtosis",
                      "lin_max", "acc_skewness", "gyro_skewness", "post_gyro_max",
                      "post_lin_max"]

#### 7. Defining the transformers

In [17]:
numeric_transformer = StandardScaler()


#### 8. We created numerical separately and categorical separately, now we need to combine this

In [18]:
preprocessor = ColumnTransformer(
    transformers = 
    [
    ("num", numeric_transformer, numerical_features)
    ]
)

In [19]:
X_train.head()

Unnamed: 0,acc_max,gyro_max,acc_kurtosis,gyro_kurtosis,lin_max,acc_skewness,gyro_skewness,post_gyro_max,post_lin_max
0,26.039919,7.309797,20.378162,2.782476,11.13108,3.891361,1.592927,7.086618,10.7904
1,25.8645,6.511954,14.18719,5.324864,7.945561,3.022175,2.376939,6.325522,7.719352
2,27.524501,12.944099,31.855926,22.891186,14.454818,4.849024,4.28389,12.888111,14.368784
3,30.647705,11.694868,23.608764,9.287735,15.228303,3.921537,2.794609,11.549971,14.944151
4,26.373917,11.168424,14.318453,15.983202,10.007396,3.087975,3.363557,11.057636,9.753058


In [20]:
X_test.head()

Unnamed: 0,acc_max,gyro_max,acc_kurtosis,gyro_kurtosis,lin_max,acc_skewness,gyro_skewness,post_gyro_max,post_lin_max
0,28.055199,10.794617,21.334536,34.163811,13.880578,3.283404,4.577283,10.755339,13.762561
1,26.639681,8.785024,13.518671,12.812894,15.789372,3.301849,3.464729,8.277714,15.341656
2,25.045219,5.307413,21.60306,4.754182,11.592445,3.124714,2.268676,4.976134,11.303823
3,24.102184,8.929061,24.647657,18.595684,10.107835,4.522305,3.955288,8.719755,9.727437
4,31.668808,10.71475,18.008912,15.086251,14.138265,3.142132,3.530621,10.580791,13.935016


#### 9. Applying the preprocessor code

In [21]:
preprocessor.fit_transform(X_train)

array([[ 0.78251629,  0.76180106,  0.86899226, ..., -0.0331466 ,
         1.12315231,  1.1222695 ],
       [ 0.75049306,  0.49266508,  0.35236934, ...,  0.75144965,
         0.90134963,  0.50684513],
       [ 1.05353174,  2.66241708,  1.82678622, ...,  2.65982166,
         2.81385571,  1.83936174],
       ...,
       [-1.76246427, -1.00110722, -0.68585406, ..., -1.20220513,
        -0.39979802, -0.8673627 ],
       [-0.3055713 , -0.09491505, -0.8297895 , ..., -1.31585641,
        -0.87123922, -0.84067464],
       [-2.15352691, -1.62866311, -0.734249  , ..., -0.45591744,
        -0.99970136, -1.04422772]])

#### 10. Converting into dataframe

In [22]:
X_train = pd.DataFrame(preprocessor.fit_transform(X_train), columns=preprocessor.get_feature_names_out())
X_test = pd.DataFrame(preprocessor.transform(X_test), columns=preprocessor.get_feature_names_out())

In [23]:
X_train

Unnamed: 0,num__acc_max,num__gyro_max,num__acc_kurtosis,num__gyro_kurtosis,num__lin_max,num__acc_skewness,num__gyro_skewness,num__post_gyro_max,num__post_lin_max
0,0.782516,0.761801,0.868992,-0.204544,0.752368,1.424829,-0.033147,1.123152,1.122269
1,0.750493,0.492665,0.352369,0.258236,0.002519,0.856668,0.751450,0.901350,0.506845
2,1.053532,2.662417,1.826786,3.455756,1.534752,2.050824,2.659822,2.813856,1.839362
3,1.623683,2.241015,1.138579,0.979580,1.716825,1.444554,1.169432,2.423887,1.954663
4,0.843489,2.063430,0.363323,2.198326,0.487860,0.899680,1.738804,2.280409,0.914391
...,...,...,...,...,...,...,...,...,...
1423,-2.146124,-1.690571,-0.815369,-0.706871,-1.855320,-1.116735,-1.494185,-0.941656,-1.039555
1424,0.760243,2.123944,-0.914700,-0.594901,2.109931,-0.988564,-0.818006,0.044603,-0.694420
1425,-1.762464,-1.001107,-0.685854,-0.870155,-1.563703,-1.319585,-1.202205,-0.399798,-0.867363
1426,-0.305571,-0.094915,-0.829789,-0.908994,-0.343341,-0.893406,-1.315856,-0.871239,-0.840675


In [24]:
X_test

Unnamed: 0,num__acc_max,num__gyro_max,num__acc_kurtosis,num__gyro_kurtosis,num__lin_max,num__acc_skewness,num__gyro_skewness,num__post_gyro_max,num__post_lin_max
0,1.150412,1.937334,0.948800,5.507663,1.399580,1.027426,2.953433,2.192311,1.717877
1,0.892005,1.259439,0.296583,1.621249,1.848897,1.039483,1.840051,1.470268,2.034321
2,0.600931,0.086338,0.971207,0.154357,0.860970,0.923695,0.643106,0.508103,1.225157
3,0.428777,1.308027,1.225272,2.673865,0.511503,1.837258,2.330975,1.599090,0.909256
4,1.810089,1.910392,0.671284,2.035058,1.460238,0.935080,1.905992,2.141444,1.752437
...,...,...,...,...,...,...,...,...,...
351,-0.158907,-0.790509,-0.612922,-0.471549,-0.361401,-0.344906,-0.603491,-1.168897,-0.678924
352,0.063350,-0.547659,-0.648110,-0.634417,-0.246815,-0.326138,-0.717211,-0.837620,-0.643454
353,0.035945,-0.316548,-0.732627,-0.817288,-0.535514,-0.566645,-1.222507,-1.002456,-0.810833
354,0.502175,0.328938,-0.886916,-0.813516,0.826015,-0.945854,-1.313887,-0.633723,-0.160778


#### 11. Model Training

In [25]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

from sklearn.metrics import accuracy_score, classification_report,ConfusionMatrixDisplay,precision_score, recall_score, f1_score

In the real world scenario we are not going to train the one model, we are going to train the multiple models.

In [26]:
models ={
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Support Vector Classifier" : SVC(),
    "Naive Bayes": GaussianNB(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Ada Boosting": AdaBoostClassifier(),
    "K-Neighbours" : KNeighborsClassifier()
}

In [27]:
for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train)

    ## Make Predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Training set performance
    model_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
    model_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score
    model_train_precision = precision_score(y_train, y_train_pred) # Calculate Precision
    model_train_recall = recall_score(y_train, y_train_pred) # Calculate Recall

    # Test set performance
    model_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
    model_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score
    model_test_precision = precision_score(y_test, y_test_pred) # Calculate Precision
    model_test_recall = recall_score(y_test, y_test_pred) # Calculate Recall

    print(list(models.keys())[i])

    print('Model performance for Training set')
    print("- Accuracy: {:.4f}".format(model_train_accuracy))
    print('- F1 score: {:.4f}'.format(model_train_f1))
    print('- Precision: {:.4f}'.format(model_train_precision))
    print('- Recall: {:.4f}'.format(model_train_recall))
    
    print('----------------------------------')
    
    print('Model performance for Test set')
    print('- Accuracy: {:.4f}'.format(model_test_accuracy))
    print('- F1 score: {:.4f}'.format(model_test_f1))
    print('- Precision: {:.4f}'.format(model_test_precision))
    print('- Recall: {:.4f}'.format(model_test_recall))
    
    print('='*35)
    print('\n')

Logistic Regression
Model performance for Training set
- Accuracy: 0.9748
- F1 score: 0.9748
- Precision: 0.9692
- Recall: 0.9723
----------------------------------
Model performance for Test set
- Accuracy: 0.9719
- F1 score: 0.9719
- Precision: 0.9613
- Recall: 0.9739


Decision Tree
Model performance for Training set
- Accuracy: 1.0000
- F1 score: 1.0000
- Precision: 1.0000
- Recall: 1.0000
----------------------------------
Model performance for Test set
- Accuracy: 0.9522
- F1 score: 0.9524
- Precision: 0.9198
- Recall: 0.9739




Random Forest
Model performance for Training set
- Accuracy: 1.0000
- F1 score: 1.0000
- Precision: 1.0000
- Recall: 1.0000
----------------------------------
Model performance for Test set
- Accuracy: 0.9747
- F1 score: 0.9748
- Precision: 0.9557
- Recall: 0.9869


Support Vector Classifier
Model performance for Training set
- Accuracy: 0.9860
- F1 score: 0.9860
- Precision: 0.9760
- Recall: 0.9919
----------------------------------
Model performance for Test set
- Accuracy: 0.9747
- F1 score: 0.9748
- Precision: 0.9557
- Recall: 0.9869


Naive Bayes
Model performance for Training set
- Accuracy: 0.9629
- F1 score: 0.9629
- Precision: 0.9606
- Recall: 0.9528
----------------------------------
Model performance for Test set
- Accuracy: 0.9719
- F1 score: 0.9719
- Precision: 0.9673
- Recall: 0.9673


Gradient Boosting
Model performance for Training set
- Accuracy: 1.0000
- F1 score: 1.0000
- Precision: 1.0000
- Recall: 1.0000
----------------------------------
Model performance for Test

In [28]:
list(models.keys())

['Logistic Regression',
 'Decision Tree',
 'Random Forest',
 'Support Vector Classifier',
 'Naive Bayes',
 'Gradient Boosting',
 'Ada Boosting',
 'K-Neighbours']

                                                        Completed