In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [None]:
data=pd.read_csv('sleep.csv')
# scaling the features dataa is same as data but feats are scaled
labels=data['sl']
data.drop('sl',axis=1,inplace=True)
scaler=StandardScaler()
scaled=scaler.fit_transform(data)
scaled=pd.DataFrame(scaled,index=data.index,columns=data.columns)

dataa=scaled.copy()
dataa['sl']=labels
dataa.head()

In [None]:
corr_matrix=dataa.corr()
print(corr_matrix["sl"].sort_values(ascending=False))
pd.plotting.scatter_matrix(scaled)
plt.show()

In [None]:
dataa.plot(kind="scatter",x='rr',y="hr",alpha=0.4,c="sl",cmap=plt.get_cmap("jet"),colorbar=True,ylabel="Heart Rate",xlabel="Resperation Rate",)
dataa.plot(kind="scatter",x='lm',y="rem",alpha=0.4,c="sl",cmap=plt.get_cmap("jet"),colorbar=True,xlabel="Limb Movement",ylabel="EyeMovement")
dataa.plot(kind="scatter",x='t',y="sr.1",alpha=0.4,c="sl",cmap=plt.get_cmap("jet"),colorbar=True,xlabel="Temprature",ylabel="Sleeping Hours")
dataa.plot(kind="scatter",x='sr',y="hr",alpha=0.4,c="sl",cmap=plt.get_cmap("jet"),colorbar=True,xlabel="Snoring",ylabel="Heart Rate")

In [None]:
from sklearn.linear_model import SGDClassifier


bin_cls=SGDClassifier(random_state=6)


binaryl=labels.replace([1,2,3,4],1)
X_trainb, X_testb, y_trainb, y_testb = train_test_split(
 scaled, binaryl, test_size=0.2, random_state=42)

bin_cls.fit(X_trainb,y_trainb)

train_pred=bin_cls.predict(X_trainb)
bin_acc_train=accuracy_score(y_trainb,train_pred)
print("Accuracy on trainset (SGD) =" ,bin_acc_train)
predictions_b=bin_cls.predict(X_testb)
bin_acc_test=accuracy_score(y_testb,predictions_b)
print("Accuracy on testset (SGD) =",bin_acc_test)
cv_sb=cross_val_score(bin_cls,X_trainb,y_trainb,scoring="accuracy",cv=5)
print(cv_sb)




In [None]:
X_train, X_test, y_train, y_test = train_test_split(
 scaled, labels, test_size=0.2, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestClassifier



forest=RandomForestClassifier(random_state=6)
forest.fit(X_train,y_train)
forest_trainpred=forest.predict(X_train)
forest_acc_train=accuracy_score(y_train,forest_trainpred)
print("Accuracy on training (Forest default) = ",forest_acc_train)
forest_testpred=forest.predict(X_test)
forest_acc_test=accuracy_score(y_test,forest_testpred)
print("Accuracy on test (Forest default) = ",forest_acc_test)
cv_s=cross_val_score(forest,X_train,y_train,scoring="accuracy",cv=5)
print("Accuracy on five folds (Forest default) \n = ",cv_s)
print("CV average =",np.average(cv_s))
conf_forest=confusion_matrix(y_test,forest_testpred)
print(conf_forest)



In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = { 
    'n_estimators': [200, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8],
    'criterion' :['gini', 'entropy']
}
CV_rfc = GridSearchCV(estimator=forest, param_grid=param_grid,cv=5)
CV_rfc.fit(X_train, y_train)
CV_rfc.best_params_


In [None]:
forestb=RandomForestClassifier(random_state=6,criterion= 'gini',
 max_depth= 4,
 max_features= 'auto',
 n_estimators= 200)
forestb.fit(X_train,y_train)
forestb_trainpred=forestb.predict(X_train)
forestb_acc_train=accuracy_score(y_train,forestb_trainpred)
print("Accuracy on training(Forest best) = ",forestb_acc_train)
forestb_testpred=forestb.predict(X_test)
forestb_acc_test=accuracy_score(y_test,forestb_testpred)
print("Accuracy on test (Forest best)= ",forestb_acc_test)
cv_sb=cross_val_score(forestb,X_train,y_train,scoring="accuracy",cv=5)
print("Accuracy on five folds (Forest best) =",cv_sb)
print("CV average =",np.average(cv_sb))
conf_forestb=confusion_matrix(y_test,forestb_testpred)
print(conf_forestb)


In [None]:
from sklearn.neighbors import KNeighborsClassifier
nei=KNeighborsClassifier()
k_range = list(range(1, 31))
weight_options = ["uniform", "distance"]
param_grid = dict(n_neighbors = k_range, weights = weight_options)
  
# defining parameter range
grid = GridSearchCV(nei, param_grid, cv=5, scoring='accuracy', return_train_score=False,verbose=1)
  
# fitting the model for grid search
grid_search=grid.fit(X_train, y_train)
grid.best_params_
print()

In [None]:
nei=KNeighborsClassifier(n_neighbors=1)
nei.fit(X_train,y_train)
nei_trainpred=nei.predict(X_train)
nei_acc_train=accuracy_score(y_train,nei_trainpred)
print("Accuracy on training (KN) = ",nei_acc_train)
nei_testpred=nei.predict(X_test)
nei_acc_test=accuracy_score(y_test,nei_testpred)
print("Accuracy on test (KN) = ",nei_acc_test)
cv_sk=cross_val_score(forest,X_train,y_train,scoring="accuracy",cv=5)
print("Accuracy on five folds (KN) =",cv_sk)
print("CV average =",np.average(cv_sk))
conf_nei=confusion_matrix(y_test,nei_testpred)
print(conf_nei)

In [None]:
from sklearn.tree import DecisionTreeClassifier
tree=DecisionTreeClassifier()
param_grid = {'max_features': ['auto', 'sqrt', 'log2'],
              'ccp_alpha': [0.1, .01, .001],
              'max_depth' : [5, 6, 7, 8, 9],
              'criterion' :['gini', 'entropy']
             }
tree_clas = DecisionTreeClassifier(random_state=1024)
grid_search = GridSearchCV(estimator=tree, param_grid=param_grid, cv=5, verbose=True)
grid_search.fit(X_train, y_train)
grid_search.best_params_
print()

In [None]:
tree=DecisionTreeClassifier(ccp_alpha= 0.001, criterion= 'gini', max_depth= 8, max_features= 'sqrt')
tree.fit(X_train,y_train)
tree_trainpred=tree.predict(X_train)
tree_acc_train=accuracy_score(y_train,tree_trainpred)
print("Accuracy on training (DecTree)= ",tree_acc_train)
tree_testpred=tree.predict(X_test)
tree_acc_test=accuracy_score(y_test,tree_testpred)
print("Accuracy on test (DecTree) = ",tree_acc_test)
cv_t=cross_val_score(tree,X_train,y_train,scoring="accuracy",cv=5)

print("Accuracy on five folds (DecTree) =",cv_t)
print("CV average =",np.average(cv_t))

conf_tree=confusion_matrix(y_test,tree_testpred)
print(conf_tree)

In [None]:
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')
lr = LogisticRegression(random_state=6)
parameters = {
    'penalty' : ['l1','l2'], 
    'C'       : np.logspace(-3,3,7),
    'solver'  : ['newton-cg', 'lbfgs', 'liblinear'],
}
clfa = GridSearchCV(lr,                   
                   param_grid = parameters,   
                   scoring='accuracy',        
                   cv=5) 
clfa.fit(X_train,y_train)                   
clfa.best_params_
print()

In [None]:
lr = LogisticRegression(C= 0.1, penalty= 'l2', solver= 'newton-cg')
lr.fit(X_train,y_train)
lr_trainpred=lr.predict(X_train)
lr_acc_train=accuracy_score(y_train,lr_trainpred)
print("Accuracy on training (LogReg) = ",lr_acc_train)
lr_testpred=lr.predict(X_test)
lr_acc_test=accuracy_score(y_test,lr_testpred)
print("Accuracy on test (LogReg) = ",lr_acc_test)
cv_lr=cross_val_score(lr,X_train,y_train,scoring="accuracy",cv=5)
print("Accuracy on five folds (LogReg) =",cv_lr)
print("CV average =",np.average(cv_lr))
conf_lr=confusion_matrix(y_test,lr_testpred)
print(conf_lr)

In [None]:
train_acc=np.array([bin_acc_train,forest_acc_train,tree_acc_train,nei_acc_train,lr_acc_train])

test_acc=np.array([forest_acc_test,tree_acc_test,nei_acc_test,lr_acc_test])
cv_acc=np.array([np.average(cv_sb),np.average(cv_t),np.average(cv_sk),np.average(cv_lr)])*100
test_acc=test_acc*100
print(train_acc)
print(test_acc)
fig = plt.figure(figsize = (10, 5))
plt.ylim(90,100)
plt.ylabel("Accuracy on Test Set %")
plt.xlabel("Algorithm")
plt.bar(height=test_acc,x=["Random Forest","Decision Tree","KNeighbors","Logistic Regression"], color =["darkgreen","lightgreen","darkcyan","lightblue"],
        width = 0.6)
plt.show()
fig = plt.figure(figsize = (10, 5))
plt.ylim(90,100)
plt.ylabel("Average Accuracy on CFV %")
plt.xlabel("Algorithm")
plt.bar(height=cv_acc,x=["Random Forest","Decision Tree","KNeighbors","Logistic Regression"], color =["darkgreen","lightgreen","darkcyan","lightblue"],
        width = 0.6)
plt.show()
print()