In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.ensemble import RandomForestRegressor

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [4]:
# --- 1. Load Data ---
try:
    data = pd.read_csv("scored_sensor_data.csv")
    print("Loaded 'scored_sensor_data.csv' successfully.")
except FileNotFoundError:
    print("Error: 'scored_sensor_data.csv' not found.")
    
# --- 2. Define Features and Target ---
features = ['bpm', 'temperature', 'humidity', 'noise', 'ldr', 'in_motion']
target = 'happiness_score'

# --- 3. Create and Fit Scalers ---
# Feature Scaler (StandardScaler): Centers data around 0. Good for inputs.
feature_scaler = StandardScaler()
X_scaled = feature_scaler.fit_transform(data[features])

# Target Scaler (MinMaxScaler): Scales data to 0-1. Best for regression outputs.
target_scaler = MinMaxScaler(feature_range=(0, 1))
y_scaled = target_scaler.fit_transform(data[[target]])

print("Features and target have been scaled.")

Loaded 'scored_sensor_data.csv' successfully.
Features and target have been scaled.


In [1]:
import tensorflow as tf
print(tf.__version__)

2.13.0


In [5]:
# --- 1. Create Time-Series Sequences ---
def create_sequences(X, y, time_steps=10):
    """Converts data into 3D sequences for the LSTM."""
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i + time_steps)])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

TIME_STEPS = 10  # Each sample will be 10 steps of sensor data
X_seq, y_seq = create_sequences(X_scaled, y_scaled, TIME_STEPS)

print(f"Sequenced features shape: {X_seq.shape}") # (Samples, Time Steps, Features)
print(f"Sequenced target shape: {y_seq.shape}")

# --- 2. Split Data ---
# shuffle=False is critical for time-series data
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42, shuffle=False)
print(f"Training data shape: {X_train.shape}, Testing data shape: {X_test.shape}")

# --- 3. Build the LSTM Model ---
print("\nBuilding LSTM model...")
n_features = X_seq.shape[2]  # 6 features

model_lstm = Sequential()
model_lstm.add(LSTM(units=64, return_sequences=True, input_shape=(TIME_STEPS, n_features)))
model_lstm.add(Dropout(0.2))
model_lstm.add(LSTM(units=32))
model_lstm.add(Dropout(0.2))
model_lstm.add(Dense(units=16, activation='relu'))
# Final layer for regression: 1 unit, linear activation
model_lstm.add(Dense(units=1, activation='linear')) 

# Compile with a regression loss function
model_lstm.compile(loss='mean_squared_error', optimizer='adam')
model_lstm.summary()

# --- 4. Train the Model ---
print("\nTraining LSTM model...")
# EarlyStopping will stop training if the validation loss doesn't improve
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model_lstm.fit(
    X_train, y_train,
    epochs=50,  # Train for more epochs, EarlyStopping will stop it
    batch_size=32,
    validation_split=0.1,
    shuffle=True, 
    callbacks=[early_stopping]
)

# --- 5. Save Model and Scalers ---
model_lstm.save('emogotchi_lstm_regressor.keras')
joblib.dump(feature_scaler, 'sensor_scaler.pkl')
joblib.dump(target_scaler, 'target_scaler.pkl')
print("LSTM model and scalers saved successfully.")

Sequenced features shape: (1040, 10, 6)
Sequenced target shape: (1040, 1)
Training data shape: (832, 10, 6), Testing data shape: (208, 10, 6)

Building LSTM model...
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 64)            18176     
                                                                 
 dropout (Dropout)           (None, 10, 64)            0         
                                                                 
 lstm_1 (LSTM)               (None, 32)                12416     
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense (Dense)               (None, 16)                528       
                                                                 
 dense_1 (Dense)      

In [6]:
import numpy as np
import pandas as pd
import joblib
from tensorflow.keras.models import load_model

# --- 1. Load All Saved Components ---
try:
    model = load_model('emogotchi_lstm_regressor.keras')
    feature_scaler = joblib.load('sensor_scaler.pkl')
    target_scaler = joblib.load('target_scaler.pkl')
    print("Successfully loaded LSTM model and both scalers for Rules Engine.")
except Exception as e:
    print(f"Error loading models. Make sure you have run the training pipeline first.\n{e}")

features = ['bpm', 'temperature', 'humidity', 'noise', 'ldr', 'in_motion']

def get_happiness_score(sensor_sequence):
    """
    Takes a sequence of 10 sensor data dictionaries, checks hard thresholds,
    runs the LSTM model, and returns only the 0-100 score.
    
    :param sensor_sequence: A list of 10 dictionaries, from oldest to newest.
    :return: A float (0-100) representing the happiness score.
    """
    emotion_score = -1
    
    # Get the MOST RECENT sensor reading
    latest_reading = sensor_sequence[-1]
    
    # --- 1. Check Hard Thresholds First ---
    if latest_reading['bpm'] > 130 and latest_reading['in_motion'] == 0:
        emotion_score = 10.0
    elif latest_reading['temperature'] > 32:
        emotion_score = 15.0
    
    # --- 2. If no hard rule, use LSTM Model ---
    if emotion_score == -1:
        try:
            # Convert list of dicts to 2D numpy array
            data_list = [[reading[f] for f in features] for reading in sensor_sequence]
            data_array = np.array(data_list)
            
            # Scale features
            data_scaled = feature_scaler.transform(data_array)
            
            # Reshape for LSTM: (1 sample, 10 time steps, 6 features)
            data_lstm = np.expand_dims(data_scaled, axis=0)
            
            # Predict the *scaled* emotion score
            scaled_score = model.predict(data_lstm, verbose=0)[0]
            
            # Inverse-transform the score to 0-100
            emotion_score = target_scaler.inverse_transform(scaled_score.reshape(-1, 1))[0][0]
            
            # Clip score to be safe (0-100)
            emotion_score = np.clip(emotion_score, 0, 100)
            
        except Exception:
            # On error, default to 'plain' score without printing
            emotion_score = 50.0
        
    # --- 3. Return only the numeric score ---
    return round(emotion_score, 1)

Successfully loaded LSTM model and both scalers for Rules Engine.
