In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)

n = 1000

sleep = np.random.uniform(4, 9, n)
study = np.random.uniform(1, 6, n)
time = np.random.choice(['morning', 'afternoon', 'night'], n)
breaks = np.random.randint(0, 3, n)
caffeine = np.random.uniform(0, 300, n)

focus = (
    0.6 * sleep
    - 0.4 * study
    - 0.002 * caffeine
    + np.where(time == 'morning', 1, 0)
    + np.random.normal(0, 0.5, n)
)

focus = np.clip(focus, 1, 10)

burnout = ((sleep < 5) & (study > 4) | (caffeine > 250)).astype(int)

df = pd.DataFrame({
    'sleep_hours': sleep,
    'study_duration': study,
    'time_of_day': time,
    'breaks_per_hour': breaks,
    'caffeine_mg': caffeine,
    'focus_score': focus,
    'burnout': burnout
})

df.head()

Unnamed: 0,sleep_hours,study_duration,time_of_day,breaks_per_hour,caffeine_mg,focus_score,burnout
0,5.872701,1.925665,night,1,62.17269,2.809267,0
1,8.753572,3.709505,night,0,213.97076,2.603906,0
2,7.65997,5.364729,night,0,115.986052,2.813064,0
3,6.993292,4.661124,afternoon,2,198.745184,1.133834,0
4,4.780093,5.032806,afternoon,2,104.088798,1.0,1


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score

X = df.drop(['focus_score', 'burnout'], axis=1)
y_reg = df['focus_score']
y_clf = df['burnout']

categorical = ['time_of_day']
numerical = ['sleep_hours', 'study_duration', 'breaks_per_hour', 'caffeine_mg']

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical),
        ('num', 'passthrough', numerical)
    ]
)

X_train, X_test, y_train_reg, y_test_reg = train_test_split(X, y_reg, test_size=0.2, random_state=42)
_, _, y_train_clf, y_test_clf = train_test_split(X, y_clf, test_size=0.2, random_state=42)

reg_model = Pipeline(steps=[
    ('preprocess', preprocessor),
    ('model', RandomForestRegressor())
])

reg_model.fit(X_train, y_train_reg)
y_pred_reg = reg_model.predict(X_test)

print("Regression R2:", r2_score(y_test_reg, y_pred_reg))
print("Regression MSE:", mean_squared_error(y_test_reg, y_pred_reg))

clf_model = Pipeline(steps=[
    ('preprocess', preprocessor),
    ('model', LogisticRegression(max_iter=1000))
])

clf_model.fit(X_train, y_train_clf)
y_pred_clf = clf_model.predict(X_test)

print("Classification Accuracy:", accuracy_score(y_test_clf, y_pred_clf))

Regression R2: 0.8081056476716046
Regression MSE: 0.2466933539789411
Classification Accuracy: 0.83
