# Feature Engineering

In this notebook, we will focus on creating new features from the existing dataset to improve model performance. Feature engineering is a crucial step in the data preparation process, as it can significantly impact the effectiveness of our predictive models.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Load the cleaned dataset
data_path = '../data/processed/sleep_cleaned.csv'
df = pd.read_csv(data_path)

# Display the first few rows of the dataframe
df.head()

In [None]:
# Feature Engineering Functions

def create_age_groups(df):
    bins = [0, 18, 35, 50, 65, 100]
    labels = ['0-18', '19-35', '36-50', '51-65', '66+']
    df['Age_Group'] = pd.cut(df['Age'], bins=bins, labels=labels, right=False)
    return df

def calculate_stress_to_sleep_ratio(df):
    df['Stress_to_Sleep_Ratio'] = df['Stress_Level'] / df['Sleep_Hours']
    return df

# Apply feature engineering functions
df = create_age_groups(df)
df = calculate_stress_to_sleep_ratio(df)

# Display the updated dataframe with new features
df.head()

In [None]:
# Save the updated dataframe with new features
output_path = '../data/processed/sleep_feature_engineered.csv'
df.to_csv(output_path, index=False)
print('Feature engineered data saved to:', output_path)