In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv("SaYoPillow.csv")

In [3]:
df.head()

Unnamed: 0,sr,rr,t,lm,bo,rem,sr.1,hr,sl
0,93.8,25.68,91.84,16.6,89.84,99.6,1.84,74.2,3
1,91.64,25.104,91.552,15.88,89.552,98.88,1.552,72.76,3
2,60.0,20.0,96.0,10.0,95.0,85.0,7.0,60.0,1
3,85.76,23.536,90.768,13.92,88.768,96.92,0.768,68.84,3
4,48.12,17.248,97.872,6.496,96.248,72.48,8.248,53.12,0


In [4]:
data = df

In [5]:
data.rename(columns={'sr': 'snoring rate', 'rr':'respiration rate', 't': 'body temperature', 'lm':'limb movement',
                            'bo':'blood oxygen', 'rem':'eye movement', 'sr.1':'sleeping hours', 'hr':'heart rate',
                            'sl':'stress level'}, inplace=True)
data.columns

Index(['snoring rate', 'respiration rate', 'body temperature', 'limb movement',
       'blood oxygen', 'eye movement', 'sleeping hours', 'heart rate',
       'stress level'],
      dtype='object')

In [6]:
data['stress level'].value_counts()

stress level
3    126
1    126
0    126
2    126
4    126
Name: count, dtype: int64

In [7]:
from mlxtend.preprocessing import minmax_scaling

X = data.drop('stress level', axis=1)
y = pd.DataFrame(data['stress level'])

X_scaled = minmax_scaling(X, columns=X.columns)

In [8]:
from sklearn.feature_selection import mutual_info_regression

mi = pd.DataFrame(mutual_info_regression(X_scaled, y), columns=['MI Scores'], index=X_scaled.columns)
corr = pd.DataFrame(X_scaled[X_scaled.columns].corrwith(y['stress level']), columns=['Correlation'])
s_corr = pd.DataFrame(X_scaled[X_scaled.columns].corrwith(y['stress level'], method='spearman'),
                      columns=['Spearman_Correlation'])

relation = mi.join(corr)
relation = relation.join(s_corr)
relation.sort_values(by='MI Scores', ascending=False)

Unnamed: 0,MI Scores,Correlation,Spearman_Correlation
snoring rate,1.597475,0.975322,0.979788
heart rate,1.594926,0.963516,0.979785
body temperature,1.594858,-0.962354,-0.979785
respiration rate,1.593868,0.963516,0.979785
blood oxygen,1.593693,-0.961092,-0.979785
limb movement,1.593591,0.971071,0.979785
eye movement,1.592082,0.951988,0.979785
sleeping hours,1.579593,-0.973036,-0.983435


In [9]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, train_size=0.8, test_size=0.2, random_state=42,
                                                      stratify = y, shuffle=True)

In [10]:
X_train

Unnamed: 0,snoring rate,respiration rate,body temperature,limb movement,blood oxygen,eye movement,sleeping hours,heart rate
0,0.887273,0.691429,0.488571,0.840000,0.522667,0.880000,0.204444,0.691429
337,0.045818,0.072000,0.893714,0.134400,0.933867,0.224000,0.889778,0.072000
60,0.969745,0.881143,0.208571,0.944533,0.233600,0.953778,0.000000,0.881143
13,0.932509,0.734857,0.025714,0.876267,0.028800,0.896889,0.000000,0.734857
265,0.202909,0.230857,0.730857,0.348800,0.789867,0.512889,0.692444,0.230857
...,...,...,...,...,...,...,...,...
574,0.021091,0.033143,0.835429,0.061867,0.897600,0.103111,0.829333,0.033143
104,0.154909,0.193143,0.693143,0.313600,0.737067,0.483556,0.633778,0.193143
174,0.153455,0.192000,0.692000,0.312533,0.735467,0.482667,0.632000,0.192000
304,0.380364,0.328000,0.542286,0.439467,0.572800,0.621333,0.320889,0.328000


In [11]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.metrics import accuracy_score


dtc = DecisionTreeClassifier()
lr = LogisticRegression()
gnb = GaussianNB()
lsvc = LinearSVC()
svc = SVC()
rfc = RandomForestClassifier()
knn = KNeighborsClassifier()
sgdc = SGDClassifier()
gbc = GradientBoostingClassifier()

models = [dtc, lr, gnb, lsvc, svc, rfc,  knn, sgdc, gbc]
model_name = ['Decision Tree', 'Logistic Regression', 'Gaussian Naive Bayes', 'Linear SVC', 'SVC', 'Random Forest',
              'KNN or k-Nearest Neighbors', 'Stochastic Gradient Descent', 'Gradient Boosting']


acc_scores = []
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_valid)
    acc_model = round(accuracy_score(y_pred, y_valid) * 100, 2)
    acc_scores.append(acc_model)
    
    
    

In [12]:
models_acc = pd.DataFrame({'Model name': model_name, 'Accuracy scores': acc_scores})
models_acc.sort_values(by='Accuracy scores', ascending=False)

Unnamed: 0,Model name,Accuracy scores
1,Logistic Regression,100.0
2,Gaussian Naive Bayes,100.0
4,SVC,100.0
6,KNN or k-Nearest Neighbors,100.0
3,Linear SVC,99.21
7,Stochastic Gradient Descent,98.41
5,Random Forest,97.62
0,Decision Tree,96.83
8,Gradient Boosting,96.83


In [13]:
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score

cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3)
cv_mean_scores = []
for model in models:
    scores = cross_val_score(model, X_scaled, y, cv=cv, scoring='accuracy')
    cv_mean_scores.append(scores.mean()*100)

In [14]:
models_val = pd.DataFrame({'Model name': model_name, 'Cross validation mean scores': cv_mean_scores})
models_val.sort_values(by='Cross validation mean scores', ascending=False)

Unnamed: 0,Model name,Cross validation mean scores
1,Logistic Regression,100.0
2,Gaussian Naive Bayes,100.0
4,SVC,100.0
6,KNN or k-Nearest Neighbors,100.0
5,Random Forest,99.047619
3,Linear SVC,98.941799
0,Decision Tree,98.624339
8,Gradient Boosting,98.465608
7,Stochastic Gradient Descent,94.021164


In [15]:
import pandas as pd
from mlxtend.preprocessing import minmax_scaling
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import mutual_info_regression
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

# Assuming 'data' is your DataFrame
X = data.drop('stress level', axis=1)
y = data['stress level']

# Scaling the features
X_scaled = minmax_scaling(X, columns=X.columns)

# Splitting the data
X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, train_size=0.8, test_size=0.2, random_state=42,
                                                      stratify=y, shuffle=True)

# Feature selection (optional, for your reference)
mi = pd.DataFrame(mutual_info_regression(X_scaled, y), columns=['MI Scores'], index=X_scaled.columns)
corr = pd.DataFrame(X_scaled[X_scaled.columns].corrwith(y), columns=['Correlation'])
s_corr = pd.DataFrame(X_scaled[X_scaled.columns].corrwith(y, method='spearman'), columns=['Spearman_Correlation'])

relation = mi.join(corr)
relation = relation.join(s_corr)
relation.sort_values(by='MI Scores', ascending=False)

# Defining the classifiers
log_clf = LogisticRegression(random_state=42)
knn_clf = KNeighborsClassifier()
svc_clf = SVC(probability=True, random_state=42)

# Creating the Voting Classifier
voting_clf = VotingClassifier(estimators=[
    ('lr', log_clf),
    ('knn', knn_clf),
    ('svc', svc_clf)
], voting='soft')

# Training the Voting Classifier
voting_clf.fit(X_train, y_train)

# Making predictions
y_pred = voting_clf.predict(X_valid)

# Evaluating the model
accuracy = accuracy_score(y_valid, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# If you want to see individual classifier performance:
for clf in (log_clf, knn_clf, svc_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred_individual = clf.predict(X_valid)
    print(f'{clf.__class__.__name__} Accuracy: {accuracy_score(y_valid, y_pred_individual):.2f}')


Accuracy: 1.00
LogisticRegression Accuracy: 1.00
KNeighborsClassifier Accuracy: 1.00
SVC Accuracy: 1.00
VotingClassifier Accuracy: 1.00


In [23]:
X_train

Unnamed: 0,snoring rate,respiration rate,body temperature,limb movement,blood oxygen,eye movement,sleeping hours,heart rate
0,0.887273,0.691429,0.488571,0.840000,0.522667,0.880000,0.204444,0.691429
337,0.045818,0.072000,0.893714,0.134400,0.933867,0.224000,0.889778,0.072000
60,0.969745,0.881143,0.208571,0.944533,0.233600,0.953778,0.000000,0.881143
13,0.932509,0.734857,0.025714,0.876267,0.028800,0.896889,0.000000,0.734857
265,0.202909,0.230857,0.730857,0.348800,0.789867,0.512889,0.692444,0.230857
...,...,...,...,...,...,...,...,...
574,0.021091,0.033143,0.835429,0.061867,0.897600,0.103111,0.829333,0.033143
104,0.154909,0.193143,0.693143,0.313600,0.737067,0.483556,0.633778,0.193143
174,0.153455,0.192000,0.692000,0.312533,0.735467,0.482667,0.632000,0.192000
304,0.380364,0.328000,0.542286,0.439467,0.572800,0.621333,0.320889,0.328000


In [24]:
import pandas as pd
from mlxtend.preprocessing import minmax_scaling

# Define the new data
new_data = pd.DataFrame({
    'snoring rate': [0, 0.045818, 0.969745],
    'respiration rate': [0.887273, 0.072000, 0.881143],
    'body temperature': [0.691429, 0.893714, 0.208571],
    'limb movement': [0.488571, 0.134400, 0.944533],
    'blood oxygen': [0.840000, 0.933867, 0.233600],
    'eye movement': [0.522667, 0.224000, 0.953778],
    'sleeping hours': [0.880000, 0.889778, 0.000000],
    'heart rate': [0.204444, 0.072000, 0.881143]
})

# Scale the new data using the same scaling as the training data
new_data_scaled = minmax_scaling(new_data, columns=new_data.columns)

# Predict the stress level using the trained Voting Classifier
new_predictions = voting_clf.predict(new_data_scaled)

# Output the predictions
print(new_predictions)


[1 0 4]


In [25]:
X

Unnamed: 0,snoring rate,respiration rate,body temperature,limb movement,blood oxygen,eye movement,sleeping hours,heart rate
0,93.800,25.680,91.840,16.600,89.840,99.60,1.840,74.20
1,91.640,25.104,91.552,15.880,89.552,98.88,1.552,72.76
2,60.000,20.000,96.000,10.000,95.000,85.00,7.000,60.00
3,85.760,23.536,90.768,13.920,88.768,96.92,0.768,68.84
4,48.120,17.248,97.872,6.496,96.248,72.48,8.248,53.12
...,...,...,...,...,...,...,...,...
625,69.600,20.960,92.960,10.960,90.960,89.80,3.440,62.40
626,48.440,17.376,98.064,6.752,96.376,73.76,8.376,53.44
627,97.504,27.504,86.880,17.752,84.256,101.88,0.000,78.76
628,58.640,19.728,95.728,9.728,94.592,84.32,6.728,59.32


In [29]:
# Calculate min and max values for each feature from the original training data
min_values = X_train.min()
max_values = X_train.max()

# Function to scale new data using these min and max values
def scale_new_data(new_data, min_values, max_values):
    return (new_data - min_values) / (max_values - min_values)

# Define the new data point in its original format (unscaled)
new_data_point = pd.DataFrame({
    'snoring rate': [93.800],
    'respiration rate': [25.680],
    'body temperature': [91.840],
    'limb movement': [16.600],
    'blood oxygen': [89.840],
    'eye movement': [99.60],
    'sleeping hours': [1.840],
    'heart rate': [74.20]
})

# Scale the new data point using the min and max values
new_data_point_scaled = scale_new_data(new_data_point, min_values, max_values)

# Predict the stress level using the trained Voting Classifier
new_prediction = voting_clf.predict(new_data_point_scaled)

# Output the prediction
print(new_prediction)


[4]


In [30]:
import joblib

# Save the trained Voting Classifier model
joblib.dump(voting_clf, 'voting_clf_model.pkl')

# Save the min and max values used for scaling
min_values.to_pickle('min_values.pkl')
max_values.to_pickle('max_values.pkl')


In [31]:
import joblib

# Load the saved Voting Classifier model
voting_clf_loaded = joblib.load('voting_clf_model.pkl')

# Load the min and max values used for scaling
min_values_loaded = pd.read_pickle('min_values.pkl')
max_values_loaded = pd.read_pickle('max_values.pkl')

# Function to scale new data using the loaded min and max values
def scale_new_data(new_data, min_values, max_values):
    return (new_data - min_values) / (max_values - min_values)

# Define the new data point in its original format (unscaled)
new_data_point = pd.DataFrame({
    'snoring rate': [93.800],
    'respiration rate': [25.680],
    'body temperature': [91.840],
    'limb movement': [16.600],
    'blood oxygen': [89.840],
    'eye movement': [99.60],
    'sleeping hours': [1.840],
    'heart rate': [74.20]
})

# Scale the new data point using the loaded min and max values
new_data_point_scaled = scale_new_data(new_data_point, min_values_loaded, max_values_loaded)

# Predict the stress level using the loaded Voting Classifier
new_prediction = voting_clf_loaded.predict(new_data_point_scaled)

# Output the prediction
print(new_prediction)


[4]
