In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [3]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [4]:
sp = pd.read_csv('dataset.csv')
sp.head()

Unnamed: 0,date,time,username,wrist,activity,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z
0,2017-6-30,13:51:15:847724020,viktor,0,0,0.265,-0.7814,-0.0076,-0.059,0.0325,-2.9296
1,2017-6-30,13:51:16:246945023,viktor,0,0,0.6722,-1.1233,-0.2344,-0.1757,0.0208,0.1269
2,2017-6-30,13:51:16:446233987,viktor,0,0,0.4399,-1.4817,0.0722,-0.9105,0.1063,-2.4367
3,2017-6-30,13:51:16:646117985,viktor,0,0,0.3031,-0.8125,0.0888,0.1199,-0.4099,-2.9336
4,2017-6-30,13:51:16:846738994,viktor,0,0,0.4814,-0.9312,0.0359,0.0527,0.4379,2.4922


In [5]:
sp.isnull().sum()

date              0
time              0
username          0
wrist             0
activity          0
acceleration_x    0
acceleration_y    0
acceleration_z    0
gyro_x            0
gyro_y            0
gyro_z            0
dtype: int64

In [6]:
sp.dtypes

date               object
time               object
username           object
wrist               int64
activity            int64
acceleration_x    float64
acceleration_y    float64
acceleration_z    float64
gyro_x            float64
gyro_y            float64
gyro_z            float64
dtype: object

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [8]:
sp.activity.value_counts()

1    44365
0    44223
Name: activity, dtype: int64

In [9]:
sp['date'] = pd.to_datetime(sp['date'],format = '%Y-%m-%d')
sp.dtypes

date              datetime64[ns]
time                      object
username                  object
wrist                      int64
activity                   int64
acceleration_x           float64
acceleration_y           float64
acceleration_z           float64
gyro_x                   float64
gyro_y                   float64
gyro_z                   float64
dtype: object

In [10]:
sp['hour'] = sp['time'].apply(lambda x:x.split(':')[:1][0])
sp.hour

0        13
1        13
2        13
3        13
4        13
         ..
88583    20
88584    20
88585    20
88586    20
88587    20
Name: hour, Length: 88588, dtype: object

In [11]:
sp['week'] = sp['date'].apply(lambda x:x.isoweekday())
sp.week

0        5
1        5
2        5
3        5
4        5
        ..
88583    7
88584    7
88585    7
88586    7
88587    7
Name: week, Length: 88588, dtype: int64

In [12]:
sp['is_week'] = sp['week'] == 7
sp.is_week

0        False
1        False
2        False
3        False
4        False
         ...  
88583     True
88584     True
88585     True
88586     True
88587     True
Name: is_week, Length: 88588, dtype: bool

In [13]:
sp.head()

Unnamed: 0,date,time,username,wrist,activity,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z,hour,week,is_week
0,2017-06-30,13:51:15:847724020,viktor,0,0,0.265,-0.7814,-0.0076,-0.059,0.0325,-2.9296,13,5,False
1,2017-06-30,13:51:16:246945023,viktor,0,0,0.6722,-1.1233,-0.2344,-0.1757,0.0208,0.1269,13,5,False
2,2017-06-30,13:51:16:446233987,viktor,0,0,0.4399,-1.4817,0.0722,-0.9105,0.1063,-2.4367,13,5,False
3,2017-06-30,13:51:16:646117985,viktor,0,0,0.3031,-0.8125,0.0888,0.1199,-0.4099,-2.9336,13,5,False
4,2017-06-30,13:51:16:846738994,viktor,0,0,0.4814,-0.9312,0.0359,0.0527,0.4379,2.4922,13,5,False


In [14]:
data = sp.iloc[:,4:]
data.head()

Unnamed: 0,activity,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z,hour,week,is_week
0,0,0.265,-0.7814,-0.0076,-0.059,0.0325,-2.9296,13,5,False
1,0,0.6722,-1.1233,-0.2344,-0.1757,0.0208,0.1269,13,5,False
2,0,0.4399,-1.4817,0.0722,-0.9105,0.1063,-2.4367,13,5,False
3,0,0.3031,-0.8125,0.0888,0.1199,-0.4099,-2.9336,13,5,False
4,0,0.4814,-0.9312,0.0359,0.0527,0.4379,2.4922,13,5,False


In [15]:
df = pd.get_dummies(data['week'],prefix = 'week_')
data = pd.concat((data,df),axis = 1,ignore_index = False)
data.head()

Unnamed: 0,activity,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z,hour,week,is_week,week__1,week__2,week__4,week__5,week__6,week__7
0,0,0.265,-0.7814,-0.0076,-0.059,0.0325,-2.9296,13,5,False,0,0,0,1,0,0
1,0,0.6722,-1.1233,-0.2344,-0.1757,0.0208,0.1269,13,5,False,0,0,0,1,0,0
2,0,0.4399,-1.4817,0.0722,-0.9105,0.1063,-2.4367,13,5,False,0,0,0,1,0,0
3,0,0.3031,-0.8125,0.0888,0.1199,-0.4099,-2.9336,13,5,False,0,0,0,1,0,0
4,0,0.4814,-0.9312,0.0359,0.0527,0.4379,2.4922,13,5,False,0,0,0,1,0,0


In [16]:
df = pd.get_dummies(data['is_week'],prefix = 'is_week_')
data = pd.concat((data,df),axis = 1,ignore_index = False)
data.head()

Unnamed: 0,activity,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z,hour,week,is_week,week__1,week__2,week__4,week__5,week__6,week__7,is_week__False,is_week__True
0,0,0.265,-0.7814,-0.0076,-0.059,0.0325,-2.9296,13,5,False,0,0,0,1,0,0,1,0
1,0,0.6722,-1.1233,-0.2344,-0.1757,0.0208,0.1269,13,5,False,0,0,0,1,0,0,1,0
2,0,0.4399,-1.4817,0.0722,-0.9105,0.1063,-2.4367,13,5,False,0,0,0,1,0,0,1,0
3,0,0.3031,-0.8125,0.0888,0.1199,-0.4099,-2.9336,13,5,False,0,0,0,1,0,0,1,0
4,0,0.4814,-0.9312,0.0359,0.0527,0.4379,2.4922,13,5,False,0,0,0,1,0,0,1,0


In [17]:
df = pd.get_dummies(data['hour'],prefix = 'hour_')
data = pd.concat((data,df),axis = 1,ignore_index = False)
data.head()

Unnamed: 0,activity,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z,hour,week,is_week,...,hour__12,hour__13,hour__14,hour__15,hour__16,hour__17,hour__18,hour__19,hour__20,hour__21
0,0,0.265,-0.7814,-0.0076,-0.059,0.0325,-2.9296,13,5,False,...,0,1,0,0,0,0,0,0,0,0
1,0,0.6722,-1.1233,-0.2344,-0.1757,0.0208,0.1269,13,5,False,...,0,1,0,0,0,0,0,0,0,0
2,0,0.4399,-1.4817,0.0722,-0.9105,0.1063,-2.4367,13,5,False,...,0,1,0,0,0,0,0,0,0,0
3,0,0.3031,-0.8125,0.0888,0.1199,-0.4099,-2.9336,13,5,False,...,0,1,0,0,0,0,0,0,0,0
4,0,0.4814,-0.9312,0.0359,0.0527,0.4379,2.4922,13,5,False,...,0,1,0,0,0,0,0,0,0,0


In [18]:
x = data.iloc[:,1:]
x

Unnamed: 0,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z,hour,week,is_week,week__1,...,hour__12,hour__13,hour__14,hour__15,hour__16,hour__17,hour__18,hour__19,hour__20,hour__21
0,0.2650,-0.7814,-0.0076,-0.0590,0.0325,-2.9296,13,5,False,0,...,0,1,0,0,0,0,0,0,0,0
1,0.6722,-1.1233,-0.2344,-0.1757,0.0208,0.1269,13,5,False,0,...,0,1,0,0,0,0,0,0,0,0
2,0.4399,-1.4817,0.0722,-0.9105,0.1063,-2.4367,13,5,False,0,...,0,1,0,0,0,0,0,0,0,0
3,0.3031,-0.8125,0.0888,0.1199,-0.4099,-2.9336,13,5,False,0,...,0,1,0,0,0,0,0,0,0,0
4,0.4814,-0.9312,0.0359,0.0527,0.4379,2.4922,13,5,False,0,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88583,0.3084,-0.8376,-0.1327,0.4823,2.0124,0.6048,20,7,True,0,...,0,0,0,0,0,0,0,0,1,0
88584,0.4977,-1.0027,-0.4397,0.1022,-1.2565,-0.0761,20,7,True,0,...,0,0,0,0,0,0,0,0,1,0
88585,0.4587,-1.1780,-0.2827,-1.4500,-0.2792,-1.2616,20,7,True,0,...,0,0,0,0,0,0,0,0,1,0
88586,0.2590,-0.8582,-0.0759,-1.5165,0.4560,-1.7755,20,7,True,0,...,0,0,0,0,0,0,0,0,1,0


In [19]:
y = data['activity']
y

0        0
1        0
2        0
3        0
4        0
        ..
88583    0
88584    0
88585    0
88586    0
88587    0
Name: activity, Length: 88588, dtype: int64

In [20]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.25,random_state = 42)

In [21]:
import warnings
warnings.filterwarnings(action = 'ignore')
lr = LogisticRegression()
lr.fit(x_train,y_train)

LogisticRegression()

In [22]:
print(lr.coef_)
print(lr.intercept_)

[[ 0.11488858  5.60135024 -2.4235913   0.02166845 -0.27823227  0.22130398
   0.77666493 -1.8796198   1.24494493  0.16083524 -2.98342886 -1.63836526
   5.02059769 -3.8896232   1.24494493 -3.32998439  1.24494493 -0.97634319
  -0.14220234 -0.64216517 -3.98890208 -5.10851802 -9.0364841   7.80735787
  12.39239768  3.28104949  0.40520012 -6.07642974]]
[-2.32911273]


In [23]:
predict = lr.predict(x_test)
predict

array([1, 1, 1, ..., 0, 0, 1], dtype=int64)

In [24]:
pd.Series(predict).value_counts()

1    11205
0    10942
dtype: int64

In [25]:
print(classification_report(y_test,predict))

              precision    recall  f1-score   support

           0       0.99      0.97      0.98     11162
           1       0.97      0.99      0.98     10985

    accuracy                           0.98     22147
   macro avg       0.98      0.98      0.98     22147
weighted avg       0.98      0.98      0.98     22147

