<a href="https://colab.research.google.com/github/Tasleem1027/OPTIMISATION_TEC1/blob/main/Untitled73.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Install required packages
!pip install kagglehub scikit-learn tensorflow joblib pandas numpy

import kagglehub
import os
import pandas as pd
import numpy as np
import time
import joblib
import tempfile

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, Flatten

# 1. Download HVAC dataset
path = kagglehub.dataset_download("alirezaetemad/simulated-hvac-energy-consumption")
print("Dataset downloaded to:", path)

# Find CSV file in downloaded folder
csv_files = [f for f in os.listdir(path) if f.endswith(".csv")]
if not csv_files:
    raise FileNotFoundError("No CSV file found in dataset directory")
data_path = os.path.join(path, csv_files[0])

# 2. Load dataset
df = pd.read_csv(data_path)
print(df.head())

# Basic preprocessing — adjust to dataset structure
# Assume dataset has columns: ['OutdoorTemp', 'Occupancy', 'Setpoint', 'EnergyConsumption']
# You may need to adjust these names based on actual file content
feature_cols = ['P0', 'P1', 'P2', 'P3', 'P4', 'v2', 'v3'] # Updated feature columns based on dataframe
target_col = 'v1' # Updated target column based on dataframe

X = df[feature_cols].values
y = df[target_col].values

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split train/test (no shuffling to preserve time-series nature)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)

# Helper to measure latency and size
def measure_model(model, X_sample, framework="sklearn"):
    start = time.time()
    _ = model.predict(X_sample)
    latency = (time.time() - start) * 1000  # ms

    with tempfile.NamedTemporaryFile(suffix=".keras", delete=False) as tmp: # Added suffix for Keras models
        if framework == "sklearn":
            joblib.dump(model, tmp.name)
        elif framework == "tf":
            model.save(tmp.name) # Removed save_format argument
        size = os.path.getsize(tmp.name) / (1024*1024)  # MB
    return latency, size

results = []

# 3. KNN
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)
mse_knn = mean_squared_error(y_test, knn.predict(X_test))
latency_knn, size_knn = measure_model(knn, X_test, "sklearn")
results.append(("KNN", mse_knn, latency_knn, size_knn))

# 4. SVM
svm = SVR(kernel='rbf')
svm.fit(X_train, y_train)
mse_svm = mean_squared_error(y_test, svm.predict(X_test))
latency_svm, size_svm = measure_model(svm, X_test, "sklearn")
results.append(("SVM", mse_svm, latency_svm, size_svm))

# 5. DNN
dnn = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1)
])
dnn.compile(optimizer='adam', loss='mse')
dnn.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)
mse_dnn = mean_squared_error(y_test, dnn.predict(X_test))
latency_dnn, size_dnn = measure_model(dnn, X_test, "tf")
results.append(("DNN", mse_dnn, latency_dnn, size_dnn))

# 6. RNN (LSTM)
X_train_rnn = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_rnn = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

rnn = Sequential([
    LSTM(32, input_shape=(1, X_train.shape[1])),
    Dense(1)
])
rnn.compile(optimizer='adam', loss='mse')
rnn.fit(X_train_rnn, y_train, epochs=5, batch_size=32, verbose=0)
mse_rnn = mean_squared_error(y_test, rnn.predict(X_test_rnn))
latency_rnn, size_rnn = measure_model(rnn, X_test_rnn, "tf")
results.append(("RNN", mse_rnn, latency_rnn, size_rnn))

# 7. CNN
cnn = Sequential([
    Conv1D(32, kernel_size=1, activation='relu', input_shape=(1, X_train.shape[1])),
    Flatten(),
    Dense(1)
])
cnn.compile(optimizer='adam', loss='mse')
cnn.fit(X_train_rnn, y_train, epochs=5, batch_size=32, verbose=0)
mse_cnn = mean_squared_error(y_test, cnn.predict(X_test_rnn))
latency_cnn, size_cnn = measure_model(cnn, X_test_rnn, "tf")
results.append(("CNN", mse_cnn, latency_cnn, size_cnn))

# 8. Ranking
results_df = pd.DataFrame(results, columns=["Model", "MSE", "Latency_ms", "Size_MB"])

# Normalization helper
def normalize(series, invert=False):
    if invert:  # lower is better
        return (series.max() - series) / (series.max() - series.min())
    else:
        return (series - series.min()) / (series.max() - series.min())

# Weights: MSE 0.5, Latency 0.3, Size 0.2
results_df["MSE_score"] = normalize(results_df["MSE"], invert=True)
results_df["Latency_score"] = normalize(results_df["Latency_ms"], invert=True)
results_df["Size_score"] = normalize(results_df["Size_MB"], invert=True)

results_df["Final_score"] = (
    0.5*results_df["MSE_score"] +
    0.3*results_df["Latency_score"] +
    0.2*results_df["Size_score"]
)

results_df = results_df.sort_values(by="Final_score", ascending=False)
print("\n=== Model Ranking ===")
print(results_df)

Dataset downloaded to: /kaggle/input/simulated-hvac-energy-consumption
   #         Solution                             W  \
0  0  C-0_0_4_0_3_1_1  IRN_TEHRAN MEHRABAD_ITMY.epw   
1  1  C-0_0_4_0_3_1_0  IRN_TEHRAN MEHRABAD_ITMY.epw   
2  2  C-0_0_4_0_3_1_3  IRN_TEHRAN MEHRABAD_ITMY.epw   
3  3  C-0_0_4_0_3_1_2  IRN_TEHRAN MEHRABAD_ITMY.epw   
4  4  C-0_0_1_0_3_1_3  IRN_TEHRAN MEHRABAD_ITMY.epw   

                             T  P0  P1  P2  P3  P4        v1    v2        v3  
0  OFFICE CHILLED BEAM JEP.idf  26   3  19  12   5  24399.14  58.0  20459.72  
1  OFFICE CHILLED BEAM JEP.idf  26   3  19  12   4  24399.14  58.0  20459.72  
2  OFFICE CHILLED BEAM JEP.idf  26   3  19  12   7  24399.14  58.0  20459.72  
3  OFFICE CHILLED BEAM JEP.idf  26   3  19  12   6  24399.14  58.0  20459.72  
4  OFFICE CHILLED BEAM JEP.idf  23   3  19  12   7  29473.03   7.5  23359.94  


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 

=== Model Ranking ===
  Model           MSE  Latency_ms   Size_MB  MSE_score  Latency_score  \
0   KNN  8.508887e+04    9.833097  0.262869   1.000000       1.000000   
1   SVM  9.683070e+06  100.733280  0.146244   0.988649       0.676424   
4   CNN  8.454608e+08  100.043774  0.025468   0.000226       0.678878   
3   RNN  8.456521e+08  252.598524  0.082030   0.000000       0.135830   
2   DNN  8.009415e+08  290.756464  0.078436   0.052876       0.000000   

   Size_score  Final_score  
0    0.000000     0.800000  
1    0.491257     0.795503  
4    1.000000     0.403777  
3    0.761743     0.193098  
2    0.776884     0.181815  
