# Load data

In [12]:
%pip install scikit-learn joblib pandas numpy tqdm
import os
import pandas as pd
import numpy as ns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report
import joblib
from tqdm import tqdm

df = pd.read_csv('C:\\Users\\84913\\source\\repos\\IOT\\backend\\ai\\data\\environmental_data.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values('timestamp')

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
df.head()

Unnamed: 0,timestamp,temperature_C,humidity_%,CO_ppm,action
0,2025-10-01 00:00:00+07:00,24.02,74.75,19.63,high_humidity_turn_on_AC
1,2025-10-01 00:01:00+07:00,23.94,75.32,20.47,high_humidity_turn_on_AC
2,2025-10-01 00:02:00+07:00,23.97,75.02,19.68,high_humidity_turn_on_AC
3,2025-10-01 00:03:00+07:00,24.02,75.49,19.9,high_humidity_turn_on_AC
4,2025-10-01 00:04:00+07:00,23.98,75.1,20.14,high_humidity_turn_on_AC


# Feature engineering

In [14]:
future_steps = 5 # Predict 5 minutes into the future
features = ['temperature_C', 'humidity_%', 'CO_ppm']

# Create Lagged Targets
for feat in features:
    df[f'target_{feat}'] = df[feat].shift(-future_steps)
df['target_action'] = df['action'].shift(-future_steps)

# Drop NaNs created by shifting
df_clean = df.dropna()

# Encoding Categorical Data

In [15]:
le = LabelEncoder()
df_clean['action_encoded'] = le.fit_transform(df_clean['action'])
df_clean['target_action_encoded'] = le.transform(df_clean['target_action'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['action_encoded'] = le.fit_transform(df_clean['action'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['target_action_encoded'] = le.transform(df_clean['target_action'])


# Prepare Inputs (X) and Outputs (y)

In [16]:
X = df_clean[features + ['action_encoded']].values

# Output 1: The future prediction
y_reg = df_clean[[f'target_{feat}' for feat in features]].values

# Output 2: The future action
y_cls = df_clean['target_action_encoded'].values

# Split data

In [17]:
X_train, X_test, y_reg_train, y_reg_test, y_cls_train, y_cls_test = train_test_split(
    X, y_reg, y_cls, test_size=0.2, random_state=42
)


# Standarisation

In [18]:
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

scaler_y_reg = StandardScaler()
y_reg_train_scaled = scaler_y_reg.fit_transform(y_reg_train)

# Model Definition (5 Hidden Layers)

In [19]:
# Architecture: Input -> 128 -> 64 -> 32 -> 16 -> 8 -> Output

# Regression Model for future environmental parameters
reg_model = MLPRegressor(
    hidden_layer_sizes=(128, 64, 32, 16, 8),
    activation='relu',
    solver='adam',
    max_iter=500,
    random_state=42
)

# Classifier for future action
cls_model = MLPClassifier(
    hidden_layer_sizes=(128, 64, 32, 16, 8),
    activation='relu',
    solver='adam',
    max_iter=500,
    random_state=42
)

# Training

In [20]:
with tqdm(total=2, desc="Overall Training Progress", colour='green') as pbar:
    
    tqdm.write("Training Regression Model...") # Use tqdm.write to avoid breaking the bar layout
    reg_model.fit(X_train_scaled, y_reg_train_scaled)
    pbar.update(1) # Advance bar by 1
    
    tqdm.write("Training Classification Model...")
    cls_model.fit(X_train_scaled, y_cls_train)
    pbar.update(1)

Overall Training Progress:   0%|[32m          [0m| 0/2 [00:00<?, ?it/s]

Training Regression Model...


Overall Training Progress:  50%|[32m█████     [0m| 1/2 [00:05<00:05,  5.77s/it]

Training Classification Model...


Overall Training Progress: 100%|[32m██████████[0m| 2/2 [00:35<00:00, 17.86s/it]


# Evaluation

In [21]:
# Regression Metrics
y_reg_pred_scaled = reg_model.predict(X_test_scaled)
y_reg_pred = scaler_y_reg.inverse_transform(y_reg_pred_scaled)
mse = mean_squared_error(y_reg_test, y_reg_pred)
r2 = r2_score(y_reg_test, y_reg_pred)

# Classification Metrics
y_cls_pred = cls_model.predict(X_test_scaled)
acc = accuracy_score(y_cls_test, y_cls_pred)

print(f"\n--- Results (5-Layer Deep Network) ---")
print(f"Environmental Prediction R2: {r2:.4f}")
print(f"Action Prediction Accuracy:  {acc*100:.2f}%")

print("\n--- Detailed Action Classification Report ---")
print(classification_report(y_cls_test, y_cls_pred, target_names=le.classes_))


--- Results (5-Layer Deep Network) ---
Environmental Prediction R2: 0.6674
Action Prediction Accuracy:  99.42%

--- Detailed Action Classification Report ---
                              precision    recall  f1-score   support

high_CO_turn_on_Air_Purifier       1.00      0.99      0.99      1505
    high_humidity_turn_on_AC       0.99      1.00      0.99      1512
        high_temp_turn_on_AC       1.00      0.99      1.00      1557
                      normal       0.99      0.99      0.99      1425

                    accuracy                           0.99      5999
                   macro avg       0.99      0.99      0.99      5999
                weighted avg       0.99      0.99      0.99      5999



# Save Models

In [22]:
save_dir = r'C:\Users\84913\source\repos\IOT\backend\ai\ai_models'

# Create the directory if it does not exist to avoid 'FileNotFoundError'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    print(f"Created directory: {save_dir}")

joblib.dump(reg_model, os.path.join(save_dir, 'ecs_deep_regressor.pkl'))
joblib.dump(cls_model, os.path.join(save_dir, 'ecs_deep_classifier.pkl'))
joblib.dump(scaler_X, os.path.join(save_dir, 'scaler_X.pkl'))
joblib.dump(scaler_y_reg, os.path.join(save_dir, 'scaler_y_reg.pkl'))
joblib.dump(le, os.path.join(save_dir, 'label_encoder.pkl'))

['C:\\Users\\84913\\source\\repos\\IOT\\backend\\ai\\ai_models\\label_encoder.pkl']