In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder

# Load dataset
df = pd.read_csv("smart_home_mood_data.csv")  # Adjust path if needed

# 1. Inspect dataset (optional)
print(df.head())
print("Missing values:\n", df.isnull().sum())
print("Mood distribution:\n", df["mood"].value_counts())

# 2. Encode categorical features
categorical_features = ['time_of_day', 'music_genre', 'movement']
encoder = OneHotEncoder(sparse=False)
encoded_cats = encoder.fit_transform(df[categorical_features])
encoded_cat_df = pd.DataFrame(encoded_cats, columns=encoder.get_feature_names_out(categorical_features))

# 3. Encode mood label
label_encoder = LabelEncoder()
df['mood_label'] = label_encoder.fit_transform(df['mood'])

# 4. Drop original categorical features and 'mood' text
df_numerical = df.drop(columns=categorical_features + ['mood'])

# 5. Combine numerical and encoded categorical features
df_preprocessed = pd.concat([df_numerical.reset_index(drop=True), encoded_cat_df.reset_index(drop=True)], axis=1)

# 6. Normalize numerical features
numerical_features = ['brightness', 'light_color_temp', 'room_temp', 'sound_level']
scaler = StandardScaler()
df_preprocessed[numerical_features] = scaler.fit_transform(df_preprocessed[numerical_features])

# 7. Final dataset preview
print(df_preprocessed.head())

# Optional: Save preprocessed dataset
df_preprocessed.to_csv("smart_home_mood_data_preprocessed.csv", index=False)


: 