# Kinematic Data Classification 


In [1]:
### Loading the required libraries....
import pandas as pd
import matplotlib.pyplot as plot
%matplotlib inline

In [2]:
### Loading the dataset...
df= pd.read_csv('run_or_walk.csv')
df.head()

Unnamed: 0,date,time,username,wrist,activity,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z
0,2017-6-30,13:51:15:847724020,viktor,0,0,0.265,-0.7814,-0.0076,-0.059,0.0325,-2.9296
1,2017-6-30,13:51:16:246945023,viktor,0,0,0.6722,-1.1233,-0.2344,-0.1757,0.0208,0.1269
2,2017-6-30,13:51:16:446233987,viktor,0,0,0.4399,-1.4817,0.0722,-0.9105,0.1063,-2.4367
3,2017-6-30,13:51:16:646117985,viktor,0,0,0.3031,-0.8125,0.0888,0.1199,-0.4099,-2.9336
4,2017-6-30,13:51:16:846738994,viktor,0,0,0.4814,-0.9312,0.0359,0.0527,0.4379,2.4922


In [3]:
### Getting the dimensions of the dataset (number of columns)
df.shape

(88588, 11)

In [4]:
### Listing the variables(columns)
df.columns

Index(['date', 'time', 'username', 'wrist', 'activity', 'acceleration_x',
       'acceleration_y', 'acceleration_z', 'gyro_x', 'gyro_y', 'gyro_z'],
      dtype='object')

In [5]:
#### Getting more info on the variables....
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88588 entries, 0 to 88587
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   date            88588 non-null  object 
 1   time            88588 non-null  object 
 2   username        88588 non-null  object 
 3   wrist           88588 non-null  int64  
 4   activity        88588 non-null  int64  
 5   acceleration_x  88588 non-null  float64
 6   acceleration_y  88588 non-null  float64
 7   acceleration_z  88588 non-null  float64
 8   gyro_x          88588 non-null  float64
 9   gyro_y          88588 non-null  float64
 10  gyro_z          88588 non-null  float64
dtypes: float64(6), int64(2), object(3)
memory usage: 7.4+ MB


In [6]:
#### Getting a statistical summary of the dataset...
df.describe()

Unnamed: 0,wrist,activity,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z
count,88588.0,88588.0,88588.0,88588.0,88588.0,88588.0,88588.0,88588.0
mean,0.52217,0.500801,-0.074811,-0.562585,-0.313956,0.00416,0.037203,0.022327
std,0.499511,0.500002,1.009299,0.658458,0.486815,1.253423,1.198725,1.914423
min,0.0,0.0,-5.3505,-3.299,-3.7538,-4.4306,-7.4647,-9.48
25%,0.0,0.0,-0.3818,-1.0335,-0.376,-0.9207,-0.644825,-1.345125
50%,1.0,1.0,-0.0595,-0.7591,-0.221,0.0187,0.0393,0.0069
75%,1.0,1.0,0.3555,-0.241775,-0.0859,0.8888,0.7337,1.3982
max,1.0,1.0,5.6033,2.668,1.6403,4.8742,8.498,11.2662


In [7]:
### checking for null values....
df.isnull().sum()
Null = df.isnull().sum()
print(Null[Null>0])

Series([], dtype: int64)


In [8]:
### Encoding the target Variable 'label' to binary (0,1)
from sklearn.preprocessing import LabelEncoder
encode = LabelEncoder()

In [9]:
df['activity'] = encode.fit_transform(df['activity'])
df['activity'].unique()

array([0, 1], dtype=int64)

In [10]:
## Extracting the target variable:
target = df['activity']
target.unique()

array([0, 1], dtype=int64)

In [11]:
###  Dropping the target variable from the dataset.
df.drop('activity',axis =1,inplace = True)
df.head()

Unnamed: 0,date,time,username,wrist,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z
0,2017-6-30,13:51:15:847724020,viktor,0,0.265,-0.7814,-0.0076,-0.059,0.0325,-2.9296
1,2017-6-30,13:51:16:246945023,viktor,0,0.6722,-1.1233,-0.2344,-0.1757,0.0208,0.1269
2,2017-6-30,13:51:16:446233987,viktor,0,0.4399,-1.4817,0.0722,-0.9105,0.1063,-2.4367
3,2017-6-30,13:51:16:646117985,viktor,0,0.3031,-0.8125,0.0888,0.1199,-0.4099,-2.9336
4,2017-6-30,13:51:16:846738994,viktor,0,0.4814,-0.9312,0.0359,0.0527,0.4379,2.4922


In [12]:
### Converting the date,time and username variables to dummy variable
df.date.value_counts()
df['date'] = encode.fit_transform(df['date'])
df['date'].value_counts()

5     20480
6     11365
9     11357
4      7934
11     7788
3      6696
2      4337
1      4201
0      3992
7      3933
10     3280
8      3225
Name: date, dtype: int64

In [13]:
df.time.value_counts()
df['time'] = encode.fit_transform(df['time']) 
df['time'].value_counts()

0        1
75086    1
25958    1
32101    1
30052    1
        ..
31386    1
25241    1
27288    1
4759     1
2047     1
Name: time, Length: 88588, dtype: int64

In [14]:
df.username.value_counts()
df['username'] = encode.fit_transform(df['username'])
df['username'].value_counts()

0    88588
Name: username, dtype: int64

In [15]:
## Converting the date,time and username variables to Categorical dtype...
df['date'] = pd.Categorical(df['date'])
df['time'] = pd.Categorical(df['time'])
df['username'] = pd.Categorical(df['username'])
df.dtypes

date              category
time              category
username          category
wrist                int64
acceleration_x     float64
acceleration_y     float64
acceleration_z     float64
gyro_x             float64
gyro_y             float64
gyro_z             float64
dtype: object

In [16]:
### Creating x(predictor ) nad y(target) variables...
x = df.values
y = target.values

In [17]:
### Importing train_test_split  and LabelEncoder from sklearn ..... 
from sklearn.model_selection import train_test_split

In [18]:
#### splitting the dataset ...
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size = 0.3, random_state = 100)
xtrain.shape

(62011, 10)

In [19]:
### Importing GaussianNB from sklearn.naive_bayes
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()

In [20]:
### Fitting the data on the classifier model...
classifier.fit(xtrain,ytrain)

GaussianNB()

In [21]:
### Prediction :
ypred = classifier.predict(xtest)

In [22]:
### Model accuracy:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(ypred,ytest)
print(accuracy)

0.8969785905105918


In [23]:
### Confution matrix:
from sklearn.metrics import confusion_matrix
conf_mat =confusion_matrix(ypred,ytest)
print(conf_mat)

[[12384  1725]
 [ 1013 11455]]


In [24]:
### Classification report:
from sklearn.metrics import classification_report
labels = ['Walk','Run']
print(classification_report(ypred,ytest, target_names= labels))

              precision    recall  f1-score   support

        Walk       0.92      0.88      0.90     14109
         Run       0.87      0.92      0.89     12468

    accuracy                           0.90     26577
   macro avg       0.90      0.90      0.90     26577
weighted avg       0.90      0.90      0.90     26577

