In [44]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

In [45]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [46]:
train['Data'] = 'Train'
test['Data'] = 'Test'

In [47]:
train.shape, test.shape

((7352, 564), (2947, 564))

In [48]:
both=pd.concat([train,test],axis=0).reset_index(drop=True)


In [49]:
both.shape

(10299, 564)

In [50]:
7352+2947

10299

In [51]:
both['subject'].unique()

array([ 1,  3,  5,  6,  7,  8, 11, 14, 15, 16, 17, 19, 21, 22, 23, 25, 26,
       27, 28, 29, 30,  2,  4,  9, 10, 12, 13, 18, 20, 24], dtype=int64)

In [52]:
both['subject'] = '#' + both['subject'].astype(str)

In [53]:
both['subject'].unique()

array(['#1', '#3', '#5', '#6', '#7', '#8', '#11', '#14', '#15', '#16',
       '#17', '#19', '#21', '#22', '#23', '#25', '#26', '#27', '#28',
       '#29', '#30', '#2', '#4', '#9', '#10', '#12', '#13', '#18', '#20',
       '#24'], dtype=object)

In [54]:
both.dtypes.value_counts()

float64    561
object       3
Name: count, dtype: int64

In [55]:
train.dtypes.value_counts()

float64    561
object       2
int64        1
Name: count, dtype: int64

In [56]:
test.dtypes.value_counts()

float64    561
object       2
int64        1
Name: count, dtype: int64

In [59]:
def basic_details(df):
    b=pd.DataFrame()
    b["Missing value"]=df.isnull().sum()
    b["N unique value"]=df.nunique()
    b['dtype']=df.dtypes
    return b

In [60]:
basic_details(both)

Unnamed: 0,Missing value,N unique value,dtype
tBodyAcc-mean()-X,0,10292,float64
tBodyAcc-mean()-Y,0,10299,float64
tBodyAcc-mean()-Z,0,10293,float64
tBodyAcc-std()-X,0,10295,float64
tBodyAcc-std()-Y,0,10297,float64
...,...,...,...
"angle(Y,gravityMean)",0,10299,float64
"angle(Z,gravityMean)",0,10299,float64
subject,0,30,object
Activity,0,6,object


In [None]:
activity = both['Activity']
label_counts = activity.value_counts()

plt.figure(figsize= (12, 8))
plt.bar(label_counts.index, label_counts)

In [None]:
Data = both['Data']
Subject = both['subject']
train = both.copy()
train = train.drop(['Data','subject','Activity'], axis =1)

In [None]:
# Standard Scaler
from sklearn.preprocessing import StandardScaler
slc = StandardScaler()
train = slc.fit_transform(train)

# dimensionality reduction
from sklearn.decomposition import PCA
pca = PCA(n_components=0.9, random_state=0)
train = pca.fit_transform(train)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(train, activity, test_size = 0.2, random_state = 0)

In [None]:
num_folds = 10
seed = 0
scoring = 'accuracy'
results = {}
accuracy = {}

In [None]:
# Finalizing the model and comparing the test, predict results
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import KFold, cross_val_score
model = KNeighborsClassifier(algorithm= 'auto', n_neighbors= 8, p= 1, weights= 'distance')

_ = cross_val_score(model, X_train, y_train, cv=10, scoring=scoring)
results["GScv"] = (_.mean(), _.std())

model.fit(X_train, y_train) 
y_predict = model.predict(X_test)

accuracy["GScv"] = accuracy_score(y_test, y_predict)

print(classification_report(y_test, y_predict))

cm= confusion_matrix(y_test, y_predict)
sns.heatmap(cm, annot=True)