# About Dataset

link  https://www.kaggle.com/datasets/jayeshx19/morning-routine-dataset


Morning Routine Productivity Dataset

This dataset contains 900 days of simulated morning routine data including wake-up times, sleep duration, meditation, exercise, breakfast type, journaling habits, productivity scores, and mood. It is designed to explore the relationship between morning habits and daily productivity or mood.

## Imports

In [29]:
import pandas as pd
import os
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
import kagglehub
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

## Load the dataset

In [2]:
# Download latest version
path = kagglehub.dataset_download("jayeshx19/morning-routine-dataset")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'morning-routine-dataset' dataset.
Path to dataset files: /kaggle/input/morning-routine-dataset


In [3]:
print(os.listdir(path))

['Morning_Routine_Productivity_Dataset.csv']


In [4]:
csv_file_path = os.path.join(path, 'Morning_Routine_Productivity_Dataset.csv')

# Load the data into a pandas DataFrame
df = pd.read_csv(csv_file_path)

# Display the head of the DataFrame
display(df.head())

Unnamed: 0,Date,Wake-up Time,Sleep Duration (hrs),Meditation (mins),Exercise (mins),Breakfast Type,Journaling (Y/N),Work Start Time,Productivity Score (1-10),Mood,Notes
0,2023-02-01 10:21:52.870632,5:30 AM,6.6,20,60,Protein-rich,Yes,6:30 AM,6,Neutral,Collection whole decision middle.
1,2023-02-02 10:21:52.870632,8:15 AM,8.0,5,50,Heavy,No,11:15 AM,7,Happy,Tree player behavior various up such attention...
2,2023-02-03 10:21:52.870632,8:30 AM,8.9,25,30,Carb-rich,Yes,10:30 AM,10,Happy,If hard focus nor trip forward change per.
3,2023-02-04 10:21:52.870632,5:00 AM,8.4,25,30,Skipped,Yes,7:00 AM,7,Happy,Physical threat federal with thing.
4,2023-02-05 10:21:52.870632,5:15 AM,8.2,25,30,Heavy,No,6:15 AM,10,Happy,Ground class Mr future.


## Explore the dataset

In [5]:
df.isnull().sum().sum()

np.int64(0)

In [6]:
df.duplicated().sum()


np.int64(0)

In [7]:
df.shape

(900, 11)

In [8]:
df["Notes"][6]

'Hit hear night management.'

## Preprocessing

In [9]:
df['Date'] = pd.to_datetime(df['Date'])

# Extract features
df['year'] = df['Date'].dt.year
df['month'] = df['Date'].dt.month
df['day'] = df['Date'].dt.day

In [10]:
df = df.drop("Date",axis=1)

In [11]:
df

Unnamed: 0,Wake-up Time,Sleep Duration (hrs),Meditation (mins),Exercise (mins),Breakfast Type,Journaling (Y/N),Work Start Time,Productivity Score (1-10),Mood,Notes,year,month,day
0,5:30 AM,6.6,20,60,Protein-rich,Yes,6:30 AM,6,Neutral,Collection whole decision middle.,2023,2,1
1,8:15 AM,8.0,5,50,Heavy,No,11:15 AM,7,Happy,Tree player behavior various up such attention...,2023,2,2
2,8:30 AM,8.9,25,30,Carb-rich,Yes,10:30 AM,10,Happy,If hard focus nor trip forward change per.,2023,2,3
3,5:00 AM,8.4,25,30,Skipped,Yes,7:00 AM,7,Happy,Physical threat federal with thing.,2023,2,4
4,5:15 AM,8.2,25,30,Heavy,No,6:15 AM,10,Happy,Ground class Mr future.,2023,2,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,8:15 AM,5.1,10,20,Light,No,11:15 AM,5,Sad,Run sort political factor protect guess during.,2025,7,15
896,6:30 AM,7.4,20,10,Light,No,9:30 AM,5,Neutral,Ever read pick.,2025,7,16
897,7:30 AM,8.1,0,50,Light,No,8:30 AM,6,Neutral,Small international discover ball.,2025,7,17
898,8:00 AM,7.2,25,10,Protein-rich,Yes,10:00 AM,7,Happy,Security relate once maintain.,2025,7,18


In [12]:
# Convert 'Wake-up Time' to datetime objects
df['Wake-up Time'] = pd.to_datetime(df['Wake-up Time'], format='%I:%M %p')

# Extract features
df['wake_up_hour'] = df['Wake-up Time'].dt.hour
df['wake_up_minute'] = df['Wake-up Time'].dt.minute
df = df.drop("Wake-up Time",axis=1)
display(df.head())

Unnamed: 0,Sleep Duration (hrs),Meditation (mins),Exercise (mins),Breakfast Type,Journaling (Y/N),Work Start Time,Productivity Score (1-10),Mood,Notes,year,month,day,wake_up_hour,wake_up_minute
0,6.6,20,60,Protein-rich,Yes,6:30 AM,6,Neutral,Collection whole decision middle.,2023,2,1,5,30
1,8.0,5,50,Heavy,No,11:15 AM,7,Happy,Tree player behavior various up such attention...,2023,2,2,8,15
2,8.9,25,30,Carb-rich,Yes,10:30 AM,10,Happy,If hard focus nor trip forward change per.,2023,2,3,8,30
3,8.4,25,30,Skipped,Yes,7:00 AM,7,Happy,Physical threat federal with thing.,2023,2,4,5,0
4,8.2,25,30,Heavy,No,6:15 AM,10,Happy,Ground class Mr future.,2023,2,5,5,15


In [13]:
df['Work Start Time'] = pd.to_datetime(df['Work Start Time'], format='%I:%M %p')

# Extract features
df['working hour'] = df['Work Start Time'].dt.hour
df['working minute'] = df['Work Start Time'].dt.minute
df = df.drop("Work Start Time",axis=1)
display(df.head())

Unnamed: 0,Sleep Duration (hrs),Meditation (mins),Exercise (mins),Breakfast Type,Journaling (Y/N),Productivity Score (1-10),Mood,Notes,year,month,day,wake_up_hour,wake_up_minute,working hour,working minute
0,6.6,20,60,Protein-rich,Yes,6,Neutral,Collection whole decision middle.,2023,2,1,5,30,6,30
1,8.0,5,50,Heavy,No,7,Happy,Tree player behavior various up such attention...,2023,2,2,8,15,11,15
2,8.9,25,30,Carb-rich,Yes,10,Happy,If hard focus nor trip forward change per.,2023,2,3,8,30,10,30
3,8.4,25,30,Skipped,Yes,7,Happy,Physical threat federal with thing.,2023,2,4,5,0,7,0
4,8.2,25,30,Heavy,No,10,Happy,Ground class Mr future.,2023,2,5,5,15,6,15


In [14]:
df = df.drop("Notes",axis=1)

In [15]:
df

Unnamed: 0,Sleep Duration (hrs),Meditation (mins),Exercise (mins),Breakfast Type,Journaling (Y/N),Productivity Score (1-10),Mood,year,month,day,wake_up_hour,wake_up_minute,working hour,working minute
0,6.6,20,60,Protein-rich,Yes,6,Neutral,2023,2,1,5,30,6,30
1,8.0,5,50,Heavy,No,7,Happy,2023,2,2,8,15,11,15
2,8.9,25,30,Carb-rich,Yes,10,Happy,2023,2,3,8,30,10,30
3,8.4,25,30,Skipped,Yes,7,Happy,2023,2,4,5,0,7,0
4,8.2,25,30,Heavy,No,10,Happy,2023,2,5,5,15,6,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,5.1,10,20,Light,No,5,Sad,2025,7,15,8,15,11,15
896,7.4,20,10,Light,No,5,Neutral,2025,7,16,6,30,9,30
897,8.1,0,50,Light,No,6,Neutral,2025,7,17,7,30,8,30
898,7.2,25,10,Protein-rich,Yes,7,Happy,2025,7,18,8,0,10,0


In [16]:
df["Mood"].value_counts()

Unnamed: 0_level_0,count
Mood,Unnamed: 1_level_1
Sad,315
Happy,313
Neutral,272


In [17]:
df["Breakfast Type"].value_counts()

Unnamed: 0_level_0,count
Breakfast Type,Unnamed: 1_level_1
Heavy,197
Carb-rich,189
Protein-rich,173
Light,172
Skipped,169


In [18]:
mood_mapping = {'Happy': 0, 'Neutral': 1, 'Sad': 2}
df['Mood'] = df['Mood'].map(mood_mapping)

In [19]:
df

Unnamed: 0,Sleep Duration (hrs),Meditation (mins),Exercise (mins),Breakfast Type,Journaling (Y/N),Productivity Score (1-10),Mood,year,month,day,wake_up_hour,wake_up_minute,working hour,working minute
0,6.6,20,60,Protein-rich,Yes,6,1,2023,2,1,5,30,6,30
1,8.0,5,50,Heavy,No,7,0,2023,2,2,8,15,11,15
2,8.9,25,30,Carb-rich,Yes,10,0,2023,2,3,8,30,10,30
3,8.4,25,30,Skipped,Yes,7,0,2023,2,4,5,0,7,0
4,8.2,25,30,Heavy,No,10,0,2023,2,5,5,15,6,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,5.1,10,20,Light,No,5,2,2025,7,15,8,15,11,15
896,7.4,20,10,Light,No,5,1,2025,7,16,6,30,9,30
897,8.1,0,50,Light,No,6,1,2025,7,17,7,30,8,30
898,7.2,25,10,Protein-rich,Yes,7,0,2025,7,18,8,0,10,0


In [20]:
df = pd.get_dummies(df, columns=['Breakfast Type'])
bool_cols = df.select_dtypes(include='bool').columns
df[bool_cols] = df[bool_cols].astype(int)
display(df.head())

Unnamed: 0,Sleep Duration (hrs),Meditation (mins),Exercise (mins),Journaling (Y/N),Productivity Score (1-10),Mood,year,month,day,wake_up_hour,wake_up_minute,working hour,working minute,Breakfast Type_Carb-rich,Breakfast Type_Heavy,Breakfast Type_Light,Breakfast Type_Protein-rich,Breakfast Type_Skipped
0,6.6,20,60,Yes,6,1,2023,2,1,5,30,6,30,0,0,0,1,0
1,8.0,5,50,No,7,0,2023,2,2,8,15,11,15,0,1,0,0,0
2,8.9,25,30,Yes,10,0,2023,2,3,8,30,10,30,1,0,0,0,0
3,8.4,25,30,Yes,7,0,2023,2,4,5,0,7,0,0,0,0,0,1
4,8.2,25,30,No,10,0,2023,2,5,5,15,6,15,0,1,0,0,0


In [21]:
df['Journaling (Y/N)'].value_counts()

Unnamed: 0_level_0,count
Journaling (Y/N),Unnamed: 1_level_1
No,460
Yes,440


In [22]:
mapping = {'Yes': 0, 'No': 1}
df['Journaling (Y/N)'] = df['Journaling (Y/N)'].map(mapping)

In [23]:
df

Unnamed: 0,Sleep Duration (hrs),Meditation (mins),Exercise (mins),Journaling (Y/N),Productivity Score (1-10),Mood,year,month,day,wake_up_hour,wake_up_minute,working hour,working minute,Breakfast Type_Carb-rich,Breakfast Type_Heavy,Breakfast Type_Light,Breakfast Type_Protein-rich,Breakfast Type_Skipped
0,6.6,20,60,0,6,1,2023,2,1,5,30,6,30,0,0,0,1,0
1,8.0,5,50,1,7,0,2023,2,2,8,15,11,15,0,1,0,0,0
2,8.9,25,30,0,10,0,2023,2,3,8,30,10,30,1,0,0,0,0
3,8.4,25,30,0,7,0,2023,2,4,5,0,7,0,0,0,0,0,1
4,8.2,25,30,1,10,0,2023,2,5,5,15,6,15,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,5.1,10,20,1,5,2,2025,7,15,8,15,11,15,0,0,1,0,0
896,7.4,20,10,1,5,1,2025,7,16,6,30,9,30,0,0,1,0,0
897,8.1,0,50,1,6,1,2025,7,17,7,30,8,30,0,0,1,0,0
898,7.2,25,10,0,7,0,2025,7,18,8,0,10,0,0,0,0,1,0


In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 18 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Sleep Duration (hrs)         900 non-null    float64
 1   Meditation (mins)            900 non-null    int64  
 2   Exercise (mins)              900 non-null    int64  
 3   Journaling (Y/N)             900 non-null    int64  
 4   Productivity Score (1-10)    900 non-null    int64  
 5   Mood                         900 non-null    int64  
 6   year                         900 non-null    int32  
 7   month                        900 non-null    int32  
 8   day                          900 non-null    int32  
 9   wake_up_hour                 900 non-null    int32  
 10  wake_up_minute               900 non-null    int32  
 11  working hour                 900 non-null    int32  
 12  working minute               900 non-null    int32  
 13  Breakfast Type_Carb-

## Train Test Split

In [25]:
X = df.drop("Mood",axis=1)
y = df['Mood']

In [26]:
X

Unnamed: 0,Sleep Duration (hrs),Meditation (mins),Exercise (mins),Journaling (Y/N),Productivity Score (1-10),year,month,day,wake_up_hour,wake_up_minute,working hour,working minute,Breakfast Type_Carb-rich,Breakfast Type_Heavy,Breakfast Type_Light,Breakfast Type_Protein-rich,Breakfast Type_Skipped
0,6.6,20,60,0,6,2023,2,1,5,30,6,30,0,0,0,1,0
1,8.0,5,50,1,7,2023,2,2,8,15,11,15,0,1,0,0,0
2,8.9,25,30,0,10,2023,2,3,8,30,10,30,1,0,0,0,0
3,8.4,25,30,0,7,2023,2,4,5,0,7,0,0,0,0,0,1
4,8.2,25,30,1,10,2023,2,5,5,15,6,15,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,5.1,10,20,1,5,2025,7,15,8,15,11,15,0,0,1,0,0
896,7.4,20,10,1,5,2025,7,16,6,30,9,30,0,0,1,0,0
897,8.1,0,50,1,6,2025,7,17,7,30,8,30,0,0,1,0,0
898,7.2,25,10,0,7,2025,7,18,8,0,10,0,0,0,0,1,0


In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## LogisticRegression And results

In [28]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ('scaler', RobustScaler()),
    ('clf', LogisticRegression(multi_class='multinomial', solver='lbfgs'))
])

# Train
pipe.fit(X_train, y_train)

# Evaluate
print("Accuracy:", pipe.score(X_test, y_test))

Accuracy: 0.9166666666666666




In [36]:
from sklearn.metrics import classification_report
y_pred = grid.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.95      0.94      0.94        63
           1       0.87      0.85      0.86        54
           2       0.92      0.95      0.94        63

    accuracy                           0.92       180
   macro avg       0.91      0.91      0.91       180
weighted avg       0.92      0.92      0.92       180



In [30]:
from sklearn.ensemble import HistGradientBoostingClassifier

hgb = HistGradientBoostingClassifier(random_state=42)
hgb.fit(X_train, y_train)
print("HGB Test acc:", hgb.score(X_test, y_test))

HGB Test acc: 1.0


## What we can do better (Voting and Stacking)

In [48]:
# balance the sets

from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

In [49]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.preprocessing import RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier, VotingClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [50]:
preproc = [
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", RobustScaler()),
]


In [51]:
lr = LogisticRegression(max_iter=500, n_jobs=None)          # multinomial default in sklearn>=1.5
rf = RandomForestClassifier(
    n_estimators=300, max_depth=None, min_samples_leaf=2, random_state=42, n_jobs=-1
)
hgb = HistGradientBoostingClassifier(
    learning_rate=0.05, max_depth=None, max_iter=300, random_state=42
)
svc = SVC(probability=True, kernel="rbf", C=2.0, gamma="scale", random_state=42)

In [52]:
voting_clf = VotingClassifier(
    estimators=[
        ("lr", lr),
        ("rf", rf),
        ("hgb", hgb),
        ("svc", svc),
    ],
    voting="soft",            # average predicted probabilities
    weights=[1, 1, 1, 1]      # tweak if one model is clearly better
)

In [53]:
pipe_voting = ImbPipeline(steps=[
    *preproc,
    ("smote", SMOTE(random_state=42, k_neighbors=5)),
    ("voting", voting_clf),
])

In [54]:
stacking_clf = StackingClassifier(
    estimators=[
        ("lr", lr),
        ("rf", rf),
        ("hgb", hgb),
        ("svc", svc),
    ],
    final_estimator=LogisticRegression(max_iter=500),
    stack_method="predict_proba",   # use probabilities as meta-features
    passthrough=True,               # concat original features to meta input
    n_jobs=-1
)

In [55]:
pipe_stacking = ImbPipeline(steps=[
    *preproc,
    ("smote", SMOTE(random_state=42, k_neighbors=5)),
    ("stack", stacking_clf),
])

In [56]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

scoring = {
    "accuracy": "accuracy",
    "macro_f1": "f1_macro",
    "weighted_f1": "f1_weighted"
}

cv_voting = cross_validate(pipe_voting, X_train, y_train, cv=cv, scoring=scoring, n_jobs=-1, return_train_score=False)
cv_stack  = cross_validate(pipe_stacking, X_train, y_train, cv=cv, scoring=scoring, n_jobs=-1, return_train_score=False)

def summarize_cv(name, cvres):
    print(f"\n=== {name} CV ===")
    for k, v in scoring.items():
        scores = cvres['test_' + k]
        print(f"{k:>12}: mean={scores.mean():.4f} | std={scores.std():.4f}")

summarize_cv("Voting", cv_voting)
summarize_cv("Stacking", cv_stack)


=== Voting CV ===
    accuracy: mean=0.9903 | std=0.0034
    macro_f1: mean=0.9900 | std=0.0035
 weighted_f1: mean=0.9903 | std=0.0034

=== Stacking CV ===
    accuracy: mean=1.0000 | std=0.0000
    macro_f1: mean=1.0000 | std=0.0000
 weighted_f1: mean=1.0000 | std=0.0000


## Best Results

In [57]:
pipe_voting.fit(X_train, y_train)
pipe_stacking.fit(X_train, y_train)

print("\n=== HOLDOUT TEST ===")
for name, model in [("Voting", pipe_voting), ("Stacking", pipe_stacking)]:
    y_pred = model.predict(X_test)
    print(f"\n{name} — Test Accuracy: {model.score(X_test, y_test):.4f}")
    print(classification_report(y_test, y_pred, digits=4))
    print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))


=== HOLDOUT TEST ===

Voting — Test Accuracy: 0.9833
              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        63
           1     0.9636    0.9815    0.9725        54
           2     0.9839    0.9683    0.9760        63

    accuracy                         0.9833       180
   macro avg     0.9825    0.9832    0.9828       180
weighted avg     0.9834    0.9833    0.9833       180

Confusion matrix:
 [[63  0  0]
 [ 0 53  1]
 [ 0  2 61]]

Stacking — Test Accuracy: 0.9889
              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        63
           1     0.9643    1.0000    0.9818        54
           2     1.0000    0.9683    0.9839        63

    accuracy                         0.9889       180
   macro avg     0.9881    0.9894    0.9886       180
weighted avg     0.9893    0.9889    0.9889       180

Confusion matrix:
 [[63  0  0]
 [ 0 54  0]
 [ 0  2 61]]
