# Notebook 04: Feature Engineering

Purpose:
- Create interpretable behavioral features
- Prepare stable datasets for ML and analytics
- Produce final processed data for downstream use

In [1]:
import pandas as pd
import numpy as np

df_study = pd.read_csv("../data/raw/student_study_habits.csv")
df_habits = pd.read_csv("../data/raw/enhanced_student_habits_performance_dataset.csv")
df_time = pd.read_csv("../data/raw/Time Management and Productivity Insights.csv")

In [2]:
df_study["study_minutes_per_week"] = df_study["study_hours_per_week"] * 60
df_study["sleep_minutes_per_day"] = df_study["sleep_hours_per_day"] * 60
df_study.rename(columns={"final_grade": "exam_score"}, inplace=True)

In [3]:
df_time["work_minutes"] = df_time["Daily Work Hours"] * 60
df_time["sleep_minutes"] = df_time["Daily Sleep Hours"] * 60
df_time["leisure_minutes"] = df_time["Daily Leisure Hours"] * 60
df_time["exercise_minutes"] = df_time["Daily Exercise Minutes"]
df_time["commute_minutes"] = df_time["Commute Time (hours)"] * 60
df_time.rename(columns={"Productivity Score": "productivity_score"}, inplace=True)

In [4]:
df_time["total_active_minutes"]=(
    df_time["work_minutes"] +
    df_time["leisure_minutes"] +
    df_time["exercise_minutes"]
)
df_time["work_ratio"]=df_time["work_minutes"]/df_time["total_active_minutes"]
df_time["leisure_ratio"]=df_time["leisure_minutes"]/df_time["total_active_minutes"]
df_time["health_ratio"]=(
    df_time["sleep_minutes"]+df_time["exercise_minutes"]
)/(
    df_time["sleep_minutes"]+
    df_time["work_minutes"]+
    df_time["leisure_minutes"]+
    df_time["exercise_minutes"]
)

In [5]:
df_time["routine_variability"]=df_time[
    ["work_minutes", "sleep_minutes", "leisure_minutes"]
].std(axis=1)

In [6]:
df_study["study_effort_score"]=(
    df_study["study_minutes_per_week"]*
    df_study["attendance_percentage"]*
    df_study["assignments_completed"]
)

In [7]:
df_habits["distraction_load"]=(
    df_habits["social_media_hours"]+
    df_habits["netflix_hours"]
)
df_habits["recovery_score"]=(
    df_habits["sleep_hours"]*
    df_habits["exercise_frequency"]
)

In [8]:
time_features=df_time[
    [
        "work_minutes",
        "leisure_minutes",
        "exercise_minutes",
        "sleep_minutes",
        "commute_minutes",
        "work_ratio",
        "leisure_ratio",
        "health_ratio",
        "routine_variability",
        "productivity_score",
    ]
]

In [9]:
study_features = df_study[
    [
        "study_minutes_per_week",
        "sleep_minutes_per_day",
        "attendance_percentage",
        "assignments_completed",
        "study_effort_score",
        "exam_score",
    ]
]

In [10]:
habit_features = df_habits[
    [
        "distraction_load",
        "recovery_score",
        "stress_level",
        "motivation_level",
        "time_management_score",
        "exam_score",
    ]
]

In [11]:
time_features.to_csv("../data/processed/time_features.csv", index=False)
study_features.to_csv("../data/processed/study_features.csv", index=False)
habit_features.to_csv("../data/processed/habit_features.csv", index=False)

In [16]:
print("Sucessfully loaded the Time Features CSV", time_features)
print("Sucessfully loaded the Study Features CSV", study_features)
print("Sucessfully loaded the habit Features CSV", habit_features)

Sucessfully loaded the Time Features CSV     work_minutes  leisure_minutes  exercise_minutes  sleep_minutes  \
0          330.0            240.0                92          312.0   
1          288.0            210.0                 6          528.0   
2          204.0            126.0                75          432.0   
3          564.0            240.0                53          414.0   
4          522.0            336.0                46          444.0   
..           ...              ...               ...            ...   
80         486.0            204.0                85          456.0   
81         420.0            360.0                55          444.0   
82         558.0            246.0                65          432.0   
83         366.0            288.0                90          408.0   
84         504.0            174.0                75          474.0   

    commute_minutes  work_ratio  leisure_ratio  health_ratio  \
0              36.0    0.498489       0.362538      0.

## Feature Engineering Summary

- Created interpretable behavioral signals
- Preserved human reasoning clarity
- Avoided overfitting and excessive feature creation
- Saved stable processed datasets for ML and analytics