In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load dataset
data = pd.read_csv('Sleep_health_and_lifestyle_dataset.csv')

# Label encoding
label_encoders = {}
for column in ['Gender', 'Occupation', 'BMI Category', 'Blood Pressure', 'Sleep Disorder']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Split the data
X = data.drop('Sleep Disorder', axis=1)
y = data['Sleep Disorder']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# SVM
clf_svm = SVC(random_state=42)
clf_svm.fit(X_train, y_train)
y_pred_svm = clf_svm.predict(X_test)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm, average='macro')
recall_svm = recall_score(y_test, y_pred_svm, average='macro')
f1_svm = f1_score(y_test, y_pred_svm, average='macro')

# Logistic Regression
clf_lr = LogisticRegression(random_state=42)
clf_lr.fit(X_train, y_train)
y_pred_lr = clf_lr.predict(X_test)
accuracy_lr = accuracy_score(y_test, y_pred_lr)
precision_lr = precision_score(y_test, y_pred_lr, average='macro')
recall_lr = recall_score(y_test, y_pred_lr, average='macro')
f1_lr = f1_score(y_test, y_pred_lr, average='macro')

# Bagging
clf_bagging = BaggingClassifier(random_state=42)
clf_bagging.fit(X_train, y_train)
y_pred_bagging = clf_bagging.predict(X_test)
accuracy_bagging = accuracy_score(y_test, y_pred_bagging)
precision_bagging = precision_score(y_test, y_pred_bagging, average='macro')
recall_bagging = recall_score(y_test, y_pred_bagging, average='macro')
f1_bagging = f1_score(y_test, y_pred_bagging, average='macro')

# Print metrics
print("SVM Metrics:\nAccuracy: {}\nPrecision: {}\nRecall: {}\nF1 Score: {}".format(accuracy_svm, precision_svm, recall_svm, f1_svm))
print("\nLogistic Regression Metrics:\nAccuracy: {}\nPrecision: {}\nRecall: {}\nF1 Score: {}".format(accuracy_lr, precision_lr, recall_lr, f1_lr))
print("\nBagging Metrics:\nAccuracy: {}\nPrecision: {}\nRecall: {}\nF1 Score: {}".format(accuracy_bagging, precision_bagging, recall_bagging, f1_bagging))


SVM Metrics:
Accuracy: 0.64
Precision: 0.6441176470588236
Recall: 0.4375
F1 Score: 0.4222794222794222

Logistic Regression Metrics:
Accuracy: 0.84
Precision: 0.8194570287593543
Recall: 0.7892441860465116
F1 Score: 0.7912253338609928

Bagging Metrics:
Accuracy: 0.88
Precision: 0.8409738409738411
Recall: 0.8255813953488372
F1 Score: 0.8296146044624746


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load dataset
data = pd.read_csv(r'D:\Sleep_health_and_lifestyle_dataset.csv')

# Label encoding
for column in ['Gender', 'Occupation', 'BMI Category', 'Blood Pressure', 'Sleep Disorder']:
    data[column] = LabelEncoder().fit_transform(data[column])

# Split the data
X = data.drop('Sleep Disorder', axis=1)
y = data['Sleep Disorder']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

print("Data loaded and split successfully. Shape of train set:", X_train.shape)


Data loaded and split successfully. Shape of train set: (299, 12)


In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid_dt = {'max_depth': [None, 10, 20], 'min_samples_split': [2, 5, 10]}

# Setup GridSearchCV
grid_dt = GridSearchCV(DecisionTreeClassifier(random_state=42), param_grid_dt, cv=3, verbose=2)
grid_dt.fit(X_train, y_train)

# Best model
best_dt = grid_dt.best_estimator_

# Prediction and evaluation
y_pred_dt = best_dt.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='macro')
recall_dt = recall_score(y_test, y_pred_dt, average='macro')
f1_dt = f1_score(y_test, y_pred_dt, average='macro')

# Print results
print("Decision Tree best parameters:", grid_dt.best_params_)
print("Decision Tree accuracy:", accuracy_dt)
print("Decision Tree precision:", precision_dt)
print("Decision Tree recall:", recall_dt)
print("Decision Tree F1 Score:", f1_dt)


Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] END ................max_depth=None, min_samples_split=2; total time=   0.0s
[CV] END ................max_depth=None, min_samples_split=2; total time=   0.0s
[CV] END ................max_depth=None, min_samples_split=2; total time=   0.0s
[CV] END ................max_depth=None, min_samples_split=5; total time=   0.0s
[CV] END ................max_depth=None, min_samples_split=5; total time=   0.0s
[CV] END ................max_depth=None, min_samples_split=5; total time=   0.0s
[CV] END ...............max_depth=None, min_samples_split=10; total time=   0.0s
[CV] END ...............max_depth=None, min_samples_split=10; total time=   0.0s
[CV] END ...............max_depth=None, min_samples_split=10; total time=   0.0s
[CV] END ..................max_depth=10, min_samples_split=2; total time=   0.0s
[CV] END ..................max_depth=10, min_samples_split=2; total time=   0.0s
[CV] END ..................max_depth=10, min_samp

In [4]:
from sklearn.ensemble import RandomForestClassifier

# Define parameter grid for Random Forest
param_grid_rf = {
    'n_estimators': [50, 100, 150],
    'max_features': ['sqrt', 'log2'],
    'max_depth': [None, 10, 20]
}

# Setup GridSearchCV for Random Forest
grid_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_rf, cv=3, verbose=2)
grid_rf.fit(X_train, y_train)

# Best model
best_rf = grid_rf.best_estimator_

# Prediction and evaluation
y_pred_rf = best_rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='macro')
recall_rf = recall_score(y_test, y_pred_rf, average='macro')
f1_rf = f1_score(y_test, y_pred_rf, average='macro')

# Print results
print("Random Forest best parameters:", grid_rf.best_params_)
print("Random Forest accuracy:", accuracy_rf)
print("Random Forest precision:", precision_rf)
print("Random Forest recall:", recall_rf)
print("Random Forest F1 Score:", f1_rf)


Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV] END .max_depth=None, max_features=sqrt, n_estimators=50; total time=   0.0s
[CV] END .max_depth=None, max_features=sqrt, n_estimators=50; total time=   0.0s
[CV] END .max_depth=None, max_features=sqrt, n_estimators=50; total time=   0.0s
[CV] END max_depth=None, max_features=sqrt, n_estimators=100; total time=   0.1s
[CV] END max_depth=None, max_features=sqrt, n_estimators=100; total time=   0.0s
[CV] END max_depth=None, max_features=sqrt, n_estimators=100; total time=   0.0s
[CV] END max_depth=None, max_features=sqrt, n_estimators=150; total time=   0.1s
[CV] END max_depth=None, max_features=sqrt, n_estimators=150; total time=   0.1s
[CV] END max_depth=None, max_features=sqrt, n_estimators=150; total time=   0.1s
[CV] END .max_depth=None, max_features=log2, n_estimators=50; total time=   0.0s
[CV] END .max_depth=None, max_features=log2, n_estimators=50; total time=   0.0s
[CV] END .max_depth=None, max_features=log2, n_e

In [5]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV

# Define parameter grid for KNN
param_grid_knn = {
    'n_neighbors': [3, 5, 7, 10],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

# Setup GridSearchCV for KNN
grid_knn = GridSearchCV(KNeighborsClassifier(), param_grid_knn, cv=3, verbose=2)
grid_knn.fit(X_train, y_train)

# Best model
best_knn = grid_knn.best_estimator_

# Prediction and evaluation
y_pred_knn = best_knn.predict(X_test)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
precision_knn = precision_score(y_test, y_pred_knn, average='macro')
recall_knn = recall_score(y_test, y_pred_knn, average='macro')
f1_knn = f1_score(y_test, y_pred_knn, average='macro')

# Print results
print("KNN best parameters:", grid_knn.best_params_)
print("KNN accuracy:", accuracy_knn)
print("KNN precision:", precision_knn)
print("KNN recall:", recall_knn)
print("KNN F1 Score:", f1_knn)


Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV] END ...metric=euclidean, n_neighbors=3, weights=uniform; total time=   0.0s
[CV] END ...metric=euclidean, n_neighbors=3, weights=uniform; total time=   0.0s
[CV] END ...metric=euclidean, n_neighbors=3, weights=uniform; total time=   0.0s
[CV] END ..metric=euclidean, n_neighbors=3, weights=distance; total time=   0.0s
[CV] END ..metric=euclidean, n_neighbors=3, weights=distance; total time=   0.0s
[CV] END ..metric=euclidean, n_neighbors=3, weights=distance; total time=   0.0s
[CV] END ...metric=euclidean, n_neighbors=5, weights=uniform; total time=   0.0s
[CV] END ...metric=euclidean, n_neighbors=5, weights=uniform; total time=   0.0s
[CV] END ...metric=euclidean, n_neighbors=5, weights=uniform; total time=   0.0s
[CV] END ..metric=euclidean, n_neighbors=5, weights=distance; total time=   0.0s
[CV] END ..metric=euclidean, n_neighbors=5, weights=distance; total time=   0.0s
[CV] END ..metric=euclidean, n_neighbors=5, weig

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV

# Load dataset
data = pd.read_csv(r'D:\Sleep_health_and_lifestyle_dataset.csv')  # Adjust the path as necessary

# Label encoding
label_encoder = LabelEncoder()
for column in ['Gender', 'Occupation', 'BMI Category', 'Blood Pressure', 'Sleep Disorder']:
    data[column] = label_encoder.fit_transform(data[column])

# Splitting data
X = data.drop('Sleep Disorder', axis=1)
y = data['Sleep Disorder']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Define a simpler parameter grid for SVM
param_grid_svm = {
    'C': [1, 10],  # Regularization parameter
    'kernel': ['rbf'],  # Kernel type
    'gamma': ['scale', 'auto']  # Kernel coefficient
}

# Setup GridSearchCV for SVM with reduced CV folds
grid_svm = GridSearchCV(SVC(random_state=42), param_grid_svm, cv=2, verbose=2)  # Reduced from 3 to 2 folds for speed
grid_svm.fit(X_train, y_train)

# Best model
best_svm = grid_svm.best_estimator_

# Prediction and evaluation
y_pred_svm = best_svm.predict(X_test)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm, average='macro')
recall_svm = recall_score(y_test, y_pred_svm, average='macro')
f1_svm = f1_score(y_test, y_pred_svm, average='macro')

# Print results
print("SVM best parameters:", grid_svm.best_params_)
print("SVM accuracy:", accuracy_svm)
print("SVM precision:", precision_svm)
print("SVM recall:", recall_svm)
print("SVM F1 Score:", f1_svm)


Fitting 2 folds for each of 4 candidates, totalling 8 fits
[CV] END .......................C=1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END .......................C=1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END ........................C=1, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END ........................C=1, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END ......................C=10, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END ......................C=10, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END .......................C=10, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END .......................C=10, gamma=auto, kernel=rbf; total time=   0.0s
SVM best parameters: {'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}
SVM accuracy: 0.88
SVM precision: 0.8837868480725622
SVM recall: 0.8125
SVM F1 Score: 0.8401656314699794


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV

# Load dataset
data = pd.read_csv(r'D:\Sleep_health_and_lifestyle_dataset.csv')  # Adjust the path as necessary

# Label encoding
label_encoder = LabelEncoder()
for column in ['Gender', 'Occupation', 'BMI Category', 'Blood Pressure', 'Sleep Disorder']:
    data[column] = label_encoder.fit_transform(data[column])

# Splitting data
X = data.drop('Sleep Disorder', axis=1)
y = data['Sleep Disorder']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Define parameter grid for Bagging Classifier
param_grid_bagging = {
    'n_estimators': [10, 50, 100],  # Number of base estimators in the ensemble
    'max_samples': [0.5, 1.0],  # The maximum number of samples to train each base estimator
    'max_features': [0.5, 1.0]  # The maximum number of features to draw from X to train each base estimator
}

# Setup GridSearchCV for Bagging Classifier
grid_bagging = GridSearchCV(BaggingClassifier(random_state=42), param_grid_bagging, cv=2, verbose=2)  # Using 2 folds for speed
grid_bagging.fit(X_train, y_train)

# Best model
best_bagging = grid_bagging.best_estimator_

# Prediction and evaluation
y_pred_bagging = best_bagging.predict(X_test)
accuracy_bagging = accuracy_score(y_test, y_pred_bagging)
precision_bagging = precision_score(y_test, y_pred_bagging, average='macro')
recall_bagging = recall_score(y_test, y_pred_bagging, average='macro')
f1_bagging = f1_score(y_test, y_pred_bagging, average='macro')

# Print results
print("Bagging Classifier best parameters:", grid_bagging.best_params_)
print("Bagging Classifier accuracy:", accuracy_bagging)
print("Bagging Classifier precision:", precision_bagging)
print("Bagging Classifier recall:", recall_bagging)
print("Bagging Classifier F1 Score:", f1_bagging)


Fitting 2 folds for each of 12 candidates, totalling 24 fits
[CV] END .max_features=0.5, max_samples=0.5, n_estimators=10; total time=   0.0s
[CV] END .max_features=0.5, max_samples=0.5, n_estimators=10; total time=   0.0s
[CV] END .max_features=0.5, max_samples=0.5, n_estimators=50; total time=   0.0s
[CV] END .max_features=0.5, max_samples=0.5, n_estimators=50; total time=   0.0s
[CV] END max_features=0.5, max_samples=0.5, n_estimators=100; total time=   0.1s
[CV] END max_features=0.5, max_samples=0.5, n_estimators=100; total time=   0.1s
[CV] END .max_features=0.5, max_samples=1.0, n_estimators=10; total time=   0.0s
[CV] END .max_features=0.5, max_samples=1.0, n_estimators=10; total time=   0.0s
[CV] END .max_features=0.5, max_samples=1.0, n_estimators=50; total time=   0.0s
[CV] END .max_features=0.5, max_samples=1.0, n_estimators=50; total time=   0.0s
[CV] END max_features=0.5, max_samples=1.0, n_estimators=100; total time=   0.1s
[CV] END max_features=0.5, max_samples=1.0, n_es

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV

# Load dataset
data = pd.read_csv(r'D:\Sleep_health_and_lifestyle_dataset.csv')

# Label encoding
label_encoder = LabelEncoder()
for column in ['Gender', 'Occupation', 'BMI Category', 'Blood Pressure', 'Sleep Disorder']:
    data[column] = label_encoder.fit_transform(data[column])

# Splitting data
X = data.drop('Sleep Disorder', axis=1)
y = data['Sleep Disorder']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Scale data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define parameter grid for Logistic Regression
param_grid_lr = {
    'C': [0.1, 1, 10],  # Regularization strength
    'solver': ['liblinear', 'lbfgs'],  # Algorithm to use in the optimization problem
    'max_iter': [200, 400]  # Increased maximum number of iterations
}

# Setup GridSearchCV for Logistic Regression
grid_lr = GridSearchCV(LogisticRegression(random_state=42), param_grid_lr, cv=2, verbose=2)
grid_lr.fit(X_train_scaled, y_train)

# Best model
best_lr = grid_lr.best_estimator_

# Prediction and evaluation
y_pred_lr = best_lr.predict(X_test_scaled)
accuracy_lr = accuracy_score(y_test, y_pred_lr)
precision_lr = precision_score(y_test, y_pred_lr, average='macro')
recall_lr = recall_score(y_test, y_pred_lr, average='macro')
f1_lr = f1_score(y_test, y_pred_lr, average='macro')

# Print results
print("Logistic Regression best parameters:", grid_lr.best_params_)
print("Logistic Regression accuracy:", accuracy_lr)
print("Logistic Regression precision:", precision_lr)
print("Logistic Regression recall:", recall_lr)
print("Logistic Regression F1 Score:", f1_lr)


Fitting 2 folds for each of 12 candidates, totalling 24 fits
[CV] END ..............C=0.1, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END ..............C=0.1, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END ..................C=0.1, max_iter=200, solver=lbfgs; total time=   0.0s
[CV] END ..................C=0.1, max_iter=200, solver=lbfgs; total time=   0.0s
[CV] END ..............C=0.1, max_iter=400, solver=liblinear; total time=   0.0s
[CV] END ..............C=0.1, max_iter=400, solver=liblinear; total time=   0.0s
[CV] END ..................C=0.1, max_iter=400, solver=lbfgs; total time=   0.0s
[CV] END ..................C=0.1, max_iter=400, solver=lbfgs; total time=   0.0s
[CV] END ................C=1, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END ................C=1, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END ....................C=1, max_iter=200, solver=lbfgs; total time=   0.0s
[CV] END ....................C=1, max_iter=200, 