In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.utils import resample
from sklearn.preprocessing import LabelEncoder , RobustScaler , StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression, Lasso, Ridge
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, r2_score, accuracy_score, recall_score, precision_score, f1_score, confusion_matrix, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

In [None]:
df = pd.read_csv("mHealth_subject4.csv")

In [None]:
df

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.isnull().sum()

In [None]:
plt.figure(figsize=(10,8))
df['Activity'].value_counts().plot.bar()
plt.show()

In [None]:
df_0 = df[df['Activity'] == 0]
df_others = df[df['Activity'] != 0]

In [None]:
df_0.shape, df_others.shape

In [None]:
plt.figure(figsize=(10,8))
df_others['Activity'].value_counts().plot.bar()
plt.show()

In [None]:
df_0_sample = df_0.sample(n=30000)

In [None]:
data = pd.concat([df_0_sample, df_others])

In [None]:
data

In [None]:
plt.figure(figure=(10,8))
data['Activity'].value_counts().plot.bar()
plt.show()

In [None]:
activity_label = {
L0: "No Activity"
L1: "Standing still (1 min)",
L2: "Sitting and relaxing (1 min)",
L3: "Lying down (1 min)",
L4: "Walking (1 min)",
L5: "Climbing stairs (1 min)",
L6: "Waist bends forward (20x)",
L7: "Frontal elevation of arms (20x)",
L8: "Knees bending (crouching) (20x)",
L9: "Cycling (1 min)",
L10: "Jogging (1 min)",
L11: "Running (1 min)",
L12: "Jump front & back (20x)"
}

In [None]:
sub1 = df[df['subject']=='subject1']
readings = ['a','g']

for i in range(1,13):
    for r in readings:
        print(f"""""""""""""""""{activity_label[i]} = {r}""""""""""""""""")
        plt.figure(figsize=(12,5))
        plt.subplot(1,2,1)
        plt.plot(sub1[sub1['Activity'] == i].reset_index(drop=True)[r + 'lx'],color = "blue" , alpha = 0.7 , label = r + 'lx')
        plt.plot(sub1[sub1['Activity'] == i].reset_index(drop=True)[r + 'ly'],color = "blue" , alpha = 0.7 , label = r + 'ly')
        plt.plot(sub1[sub1['Activity'] == i].reset_index(drop=True)[r + 'lz'],color = "blue" , alpha = 0.7 , label = r + 'lz')
        plt.title("Left ankle sensor")
        plt.legend()

        plt.subplot(1,2,1)
        plt.plot(sub1[sub1['Activity'] == i].reset_index(drop=True)[r + 'lx'],color = "blue" , alpha = 0.7 , label = r + 'lx')
        plt.plot(sub1[sub1['Activity'] == i].reset_index(drop=True)[r + 'ly'],color = "blue" , alpha = 0.7 , label = r + 'ly')
        plt.plot(sub1[sub1['Activity'] == i].reset_index(drop=True)[r + 'lz'],color = "blue" , alpha = 0.7 , label = r + 'lz')
        plt.title("Right wrist sensor")
        plt.legend()
        plt.show()

In [None]:
data["Activity"] = data["Activity"].replace([0,1,2,3,4,5,6,7,8,9,10,11,12]),[
 "No Activity"
 "Standing still (1 min)",
 "Sitting and relaxing (1 min)",
 "Lying down (1 min)",
 "Walking (1 min)",
 "Climbing stairs (1 min)",
 "Waist bends forward (20x)",
 "Frontal elevation of arms (20x)",
 "Knees bending (crouching) (20x)",
 "Cycling (1 min)",
 "Jogging (1 min)",
 "Running (1 min)",
 "Jump front & back (20x)"
]


In [None]:
data

In [None]:
df1 = data.copy()

for feature in df1.columns[:-2]:
    lower_range = np.quantile(df[feature], 0.01)
    upper_range = np.quantile(df[feature], 0.99)
    print(feature, 'range:', lower_range, '-', upper_range)

    df1 = df1.drop(df1[(df1[feature] > upper_range) | (df1[feature] < lower_range)].index, axis = 0)
    print('shape' , df1.shape)

In [None]:
le = LabelEncoder()

In [None]:
df['subject'] = le.fit_transform(df['subject'])

In [None]:
df1.dtypes

In [None]:
df1['Activity'] = le.fit_transform(df1['Activity'])

In [None]:
df.plot(kind='box' , subplots=True, layout = (5,5), figsize=(20,15))

In [None]:
X = df1.drop(["Activity", "subject"], axis = 1).values
Y = df1["Activity"].values

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25)

In [None]:
ro_scaler = RobustScaler().fit(X_train)

In [None]:
X_train_sc = ro_scaler.transform(X_train)
X_test = ro_scaler.transform(X_test)

In [None]:
def resultsSummarizer(Y_true, Y_pred, cm_en=True):
    cm= confusion_matrix(Y_true, Y_pred)
    acc = accuracy_score(Y_true, Y_pred)
    prec = precision_score(Y_true, Y_pred, average='macro')
    rec, sensitivity = recall_score(Y_true, Y_pred, average='macro')
    f1 = f1_score(Y_true, Y_pred, average='macro')

    if cm_en:
        plt.Figure(figsize=(15,15))

        sns.heatmap(cm, annot= True, cmap= "Blues" , xticklabels=activity_label.values(), yticklabels=activity_label.values())

        plt.title("Confusion Matrix")
        plt.show()

    print(f"Accuracy Score: " + "(:.4%)".format(acc))
    print(f"Precision Score: " + "(:.4%)".format(prec))
    print(f"Recall Score: " + "(:.4%)".format(rec))
    print(f"F1 Score: " + "(:.4%)".format(f1))

In [None]:
lr = LogisticRegression()
lr.fit(X_train, Y_train)
lr.score(X_train, Y_train), lr.score(X_test,Y_test)

In [None]:
lr2 = LogisticRegression()
lr2.fit(X_train_sc, Y_train)
lr2.score(X_train, Y_train), lr.score(X_test,Y_test)

In [None]:
Y_pred_lr = lr2.predict(X_test_scaled)

In [None]:
resultsSummarizer(Y_test, Y_pred_lr)

In [None]:
model = Lasso()
model.fit(X_train_sc, Y_train)
model.score(X_train_sc, Y_train), model.score(X_test_sc, Y_test)

In [None]:
model = Ridge()
model.fit(X_train_sc, Y_train)
model.score(X_train_sc, Y_train), model.score(X_test_sc, Y_test)

In [None]:
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, Y_train)
model.score(X_train, Y_train), model.score(X_test, Y_test)

In [None]:
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train_sc, Y_train)
model.score(X_train_sc, Y_train), model.score(X_test_sc, Y_test)

In [None]:
model = RandomForestClassifier()
model.fit(X_train_sc, Y_train)
model.score(X_train_sc, Y_train), model.score(X_test_sc, Y_test)

In [None]:
Y_pred = model.predict(X_test_sc)

In [None]:
Y_pred

In [None]:
Y_test

In [None]:
knn2 = KNeighborsClassifier(n_neighbors=5)
knn2.fit(X_train,Y_train)

In [None]:
y_pred_knn = knn2.predict(X_test)

In [None]:
resultsSummarizer(Y_test, y_pred_knn2, cm_en=False)

In [None]:
for n in range(1,11):
    knn1 = KNeighborsClassifier(n_neighbors=n)
    knn1.fit(X_train_scaled, Y_train)
    Y_pred = knn.predict(X_test_scaled)
    print(F"\n""""""""""No of Neighbours: {n}""""""""""\n")
    resultsSummarizer(Y_test, y_pred, cm_en=False)

In [None]:
dt = DecisionTreeClassifier(max_mdepth=14)
dt.fit(X_train,Y_train)
y_pred_dt = dt.predict(X_test)
resultsSummarizer(Y_test,y_pred_dt)