In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report


In [8]:
data = pd.read_csv('student_sleep_patterns.csv')

Index(['Student_ID', 'Age', 'Gender', 'University_Year', 'Sleep_Duration',
       'Study_Hours', 'Screen_Time', 'Caffeine_Intake', 'Physical_Activity',
       'Sleep_Quality', 'Weekday_Sleep_Start', 'Weekend_Sleep_Start',
       'Weekday_Sleep_End', 'Weekend_Sleep_End'],
      dtype='object')

In [9]:
# Preprocessing
# Convert categorical columns to numeric, assuming 'Gender' and 'University_Year' are categorical
label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])
data['University_Year'] = label_encoder.fit_transform(data['University_Year'])
data['Sleep_Quality'] = label_encoder.fit_transform(data['Sleep_Quality'])

# Selecting features and target
X = data[['Age', 'Gender', 'University_Year', 'Sleep_Duration', 'Study_Hours', 
          'Screen_Time', 'Caffeine_Intake', 'Physical_Activity', 
          'Weekday_Sleep_Start', 'Weekend_Sleep_Start', 
          'Weekday_Sleep_End', 'Weekend_Sleep_End']]
y = data['Sleep_Quality']

In [10]:
# Splitting data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize models
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Support Vector Machine": SVC()
}


In [11]:
# Training and evaluating each model
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model_name} Accuracy: {accuracy * 100:.2f}%")
    print(classification_report(y_test, y_pred))

Logistic Regression Accuracy: 6.00%
              precision    recall  f1-score   support

           0       0.17      0.17      0.17        18
           1       0.18      0.22      0.20         9
           2       0.00      0.00      0.00        12
           3       0.00      0.00      0.00         8
           4       0.00      0.00      0.00         8
           5       0.00      0.00      0.00         9
           6       0.00      0.00      0.00         9
           7       0.00      0.00      0.00         8
           8       0.00      0.00      0.00         7
           9       0.17      0.08      0.11        12

    accuracy                           0.06       100
   macro avg       0.05      0.05      0.05       100
weighted avg       0.07      0.06      0.06       100

Random Forest Accuracy: 7.00%
              precision    recall  f1-score   support

           0       0.13      0.11      0.12        18
           1       0.00      0.00      0.00         9
           2

In [20]:
import pickle

# Save the model
with open('random_forest_model.pkl', 'wb') as f:
    pickle.dump(models['Random Forest'], f)

# Save the scaler
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)



In [21]:
import pickle

# Load a model (e.g., Random Forest) and the scaler
with open('random_forest_model.pkl', 'rb') as f:
    random_forest_model = pickle.load(f)

with open('scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)
