Building a Energy Efficiency model
 

In [25]:
import pandas as pd
url = "https://raw.githubusercontent.com/StephenElston/DataScience350/master/Lecture1/EnergyEfficiencyData.csv"
df = pd.read_csv(url)

In [26]:
df.head()

Unnamed: 0,Relative Compactness,Surface Area,Wall Area,Roof Area,Overall Height,Orientation,Glazing Area,Glazing Area Distribution,Heating Load,Cooling Load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28


 ✅ 1. Step one of creating the model

In [27]:
 
from tensorflow import keras
from tensorflow.keras import layers 

model = keras.Sequential([

    layers.Dense(units=64, activation="relu", input_shape=[8]),
    layers.Dense(units=64, activation="relu"),
    layers.Dense(units=64, activation="relu"),
    layers.Dense(units=2)
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


 ✅ 2. Compile the Model

In [None]:
# ✅ 2. Compile the Model
# Before training, you need to tell Keras:
# What loss function to optimize
# What optimizer to use
# What metrics to monitor
# For a regression task (predicting continuous values):
model.compile(
    optimizer="adam",            # Adaptive optimizer
    loss="mse",                  # Mean Squared Error for regression
    metrics=["mae"]              # Mean Absolute Error as performance metric
)



📊 3. Prepare Your Data
Assuming you have your features X and target y from the energy dataset:

In [31]:
# Side Note X should be capitalize 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Define input features (X) and both outputs (y)
X = df.iloc[:, 0:8]                   # first 8 columns as features   # 8 input features
y = df[["Heating Load", "Cooling Load"]] # regression target  # 2 targets as a DataFrame

# Split into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize inputs by scaling it to standardize it before feeding it to the model
scaler= StandardScaler()
X_train_scaled = scaler.fit_transform(X_train) # Fit on training data
# But that’s not ideal. You should only fit the scaler on your training data, then use transform on your test data. Why? Because fit_transform on test data leaks information from the test set into your preprocessing pipeline, which can lead to data leakage and overly optimistic model performance.
X_test_scaled = scaler.transform(X_test) # Transform test data only
# Using fit_transform on both sets means the scaler learns from both training and test data, which violates the principle of keeping test data unseen. This can skew your model evaluation and lead to misleading results.


# Scale targets
y_scaler = StandardScaler()
y_train_scaled = y_scaler.fit_transform(y_train) # Fit on training data
# But that’s not ideal. You should only fit the scaler on your training data, then use transform on your test data. Why? Because fit_transform on test data leaks information from the test set into your preprocessing pipeline, which can lead to data leakage and overly optimistic model performance.
y_test_scaled = y_scaler.transform(y_test) # Transform test data only
# Using fit_transform on both sets means the scaler learns from both training and test data, which violates the principle of keeping test data unseen. This can skew your model evaluation and lead to misleading results.



📈 5. Evaluate the Model

In [32]:
# loss mae = model.evaluate(X_test_scaled, y_test)
# print(f"Test Loss: {loss:.4f}, Test MAE: {mae:.4f}")
# model.metrics_names  # Returns: ['loss', 'mae']


results = model.evaluate(X_test_scaled, y_test_scaled)
print("Evaluation Results:", results)

# Loss (684.35): This is the Mean Squared Error (MSE) over both output values — it combines the errors for Heating and Cooling Load.
# MAE (24.15): This is the Mean Absolute Error, again aggregated — meaning it's the average absolute difference between the predicted and actual Heating/Cooling values.

# 🧠 So on average, your model is off by ~24 units (kWh/m², the unit used in the dataset).

ValueError: You must call `compile()` before using the model.

✅ 1. Print actual vs predicted values

In [30]:
y_pred = model.predict(X_test_scaled)

for i in range(5):
    heating_pred = y_pred[i][0]
    cooling_pred = y_pred[i][1]
    heating_true = y_test.values[i][0]
    cooling_true = y_test.values[i][1]

    print(f"Predicted → Heating: {heating_pred:.2f}, Cooling: {cooling_pred:.2f} | "
          f"Actual → Heating: {heating_true:.2f}, Cooling: {cooling_true:.2f}")

print("Predictions shape:", y_pred.shape)  # Should be (num_samples, 2)



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
Predicted → Heating: -0.10, Cooling: -0.08 | Actual → Heating: 16.47, Cooling: 16.90
Predicted → Heating: -0.03, Cooling: 0.05 | Actual → Heating: 13.17, Cooling: 16.39
Predicted → Heating: -0.35, Cooling: -0.15 | Actual → Heating: 32.82, Cooling: 32.78
Predicted → Heating: -0.16, Cooling: 0.03 | Actual → Heating: 41.32, Cooling: 46.23
Predicted → Heating: -0.05, Cooling: -0.12 | Actual → Heating: 16.69, Cooling: 19.76
Predictions shape: (154, 2)


✅ 2. Plot Predicted vs Actual

In [None]:
import matplotlib.pyplot as plt

# Separate the values
y_test_np = y_test.values
y_pred_np = y_pred

# Plot Heating Load
plt.figure(figsize=(6, 6))
plt.scatter(y_test_np[:, 0], y_pred_np[:, 0], alpha=0.7)
plt.plot([y_test_np[:, 0].min(), y_test_np[:, 0].max()],
         [y_test_np[:, 0].min(), y_test_np[:, 0].max()],
         'r--')
plt.xlabel("Actual Heating Load")
plt.ylabel("Predicted Heating Load")
plt.title("Heating Load: Actual vs Predicted")
plt.grid(True)
plt.show()

# Plot Cooling Load
plt.figure(figsize=(6, 6))
plt.scatter(y_test_np[:, 1], y_pred_np[:, 1], alpha=0.7)
plt.plot([y_test_np[:, 1].min(), y_test_np[:, 1].max()],
         [y_test_np[:, 1].min(), y_test_np[:, 1].max()],
         'r--')
plt.xlabel("Actual Cooling Load")
plt.ylabel("Predicted Cooling Load")
plt.title("Cooling Load: Actual vs Predicted")
plt.grid(True)
plt.show()


NameError: name 'history' is not defined

✅ 7. Make Predictions

In [10]:
# Predict on test data
y_pred = model.predict(X_test_scaled)

# Print the first 5 predictions alongside actual values
import numpy as np
for pred, actual in zip(y_pred[:5], y_test[:5]):
    print(f"Predicted: {pred[0]:.2f}, Actual: {actual:.2f}")


NameError: name 'X_test_scaled' is not defined

💾 8. Save the Model

In [11]:
model.save("energy_model.h5")  # Save in HDF5 format

# You can later load it with:

from tensorflow.keras.models import load_model

model = load_model("energy_model.h5")




ValueError: Could not deserialize 'keras.metrics.mse' because it is not a KerasSaveable subclass

🧪 9. Visualize Predictions vs Actuals

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(6, 6))
plt.scatter(y_test, y_pred, alpha=0.7)
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.title("Actual vs Predicted Values")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')  # Line y=x
plt.grid(True)
plt.show()
