In [5]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder

from sklearn.preprocessing import OneHotEncoder



# Load dataset
df = pd.read_csv("smart_home_mood_data.csv")  # Adjust path if needed

# 1. Inspect dataset (optional)
print(df.head())
print("Missing values:\n", df.isnull().sum())
print("Mood distribution:\n", df["mood"].value_counts())

# 2. Encode categorical features
categorical_features = ['time_of_day', 'music_genre', 'movement']

# Updated encoder
encoder = OneHotEncoder(sparse_output=False)
encoded_cats = encoder.fit_transform(df[categorical_features])
encoded_cat_df = pd.DataFrame(encoded_cats, columns=encoder.get_feature_names_out(categorical_features))

# 3. Encode mood label
label_encoder = LabelEncoder()
df['mood_label'] = label_encoder.fit_transform(df['mood'])

# 4. Drop original categorical features and 'mood' text
df_numerical = df.drop(columns=categorical_features + ['mood'])

# 5. Combine numerical and encoded categorical features
df_preprocessed = pd.concat([df_numerical.reset_index(drop=True), encoded_cat_df.reset_index(drop=True)], axis=1)

# 6. Normalize numerical features
numerical_features = ['brightness', 'light_color_temp', 'room_temp', 'sound_level']
scaler = StandardScaler()
df_preprocessed[numerical_features] = scaler.fit_transform(df_preprocessed[numerical_features])

# 7. Final dataset preview
print(df_preprocessed.head())

# Optional: Save preprocessed dataset
df_preprocessed.to_csv("smart_home_mood_data_preprocessed.csv", index=False)


  time_of_day  brightness  light_color_temp  room_temp  sound_level  \
0     Morning          21              5733       22.1           75   
1   Afternoon          41              2002       16.1           59   
2   Afternoon           3              3509       21.7           22   
3     Evening          73              5699       18.3           28   
4     Morning          20              4331       24.7           71   

  music_genre movement       mood  
0        Jazz     High  Energetic  
1         NaN     High   Stressed  
2         Pop     High   Stressed  
3         Pop      Low    Neutral  
4         NaN   Medium   Stressed  
Missing values:
 time_of_day          0
brightness           0
light_color_temp     0
room_temp            0
sound_level          0
music_genre         21
movement             0
mood                 0
dtype: int64
Mood distribution:
 mood
Neutral      47
Stressed     37
Energetic     8
Relaxed       7
Sad           1
Name: count, dtype: int64
   brightnes

In [7]:
# Remove rare mood classes with < 2 samples
rare_moods = df['mood_label'].value_counts()[df['mood_label'].value_counts() < 2].index
df = df[~df['mood_label'].isin(rare_moods)]


In [8]:
# Features and target
X = df.drop(columns=['mood_label'])
y = df['mood_label']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


✅ Accuracy: 0.95

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      0.90      0.95        10
           2       1.00      1.00      1.00         1
           4       0.88      1.00      0.93         7

    accuracy                           0.95        20
   macro avg       0.97      0.97      0.97        20
weighted avg       0.96      0.95      0.95        20


Confusion Matrix:
 [[2 0 0 0]
 [0 9 0 1]
 [0 0 1 0]
 [0 0 0 7]]


In [13]:
import numpy as np

# Example live environment input
live_input = {
    "time_of_day": "Night",
        "brightness": 5,
        "light_color_temp": 1800,
        "room_temp": 20.0,
        "sound_level": 10,
        "music_genre": "Rock",
        "movement": "Low"
}

# Create a one-row DataFrame
live_df = pd.DataFrame([live_input])

# Fill missing columns for one-hot encoding consistency
for col in encoder.get_feature_names_out(['time_of_day', 'music_genre', 'movement']):
    live_df[col] = 0

# Encode categorical fields
cat_encoded = encoder.transform(live_df[categorical_features])
cat_encoded_df = pd.DataFrame(cat_encoded, columns=encoder.get_feature_names_out(categorical_features))

# Combine numerical features
num_features = ['brightness', 'light_color_temp', 'room_temp', 'sound_level']
num_scaled = scaler.transform(live_df[num_features])
num_scaled_df = pd.DataFrame(num_scaled, columns=num_features)

# Concatenate numerical and encoded categorical
live_processed = pd.concat([num_scaled_df, cat_encoded_df], axis=1)

# Match model feature order (fill missing if needed)
for col in X_train.columns:
    if col not in live_processed.columns:
        live_processed[col] = 0
live_processed = live_processed[X_train.columns]  # Reorder columns

# Predict
pred_label = model.predict(live_processed)[0]

# Map back to mood name (based on original label encoding)
label_map = {
    0: "Energetic",
    1: "Neutral",
    2: "Relaxed",
    4: "Stressed"
}
predicted_mood = label_map[pred_label]

print(f"🧠 Predicted Mood: {predicted_mood}")


🧠 Predicted Mood: Neutral


In [14]:
import joblib

joblib.dump(model, 'mood_model.pkl')
joblib.dump(encoder, 'encoder.pkl')
joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']