In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [3]:
# Load dataset
data = pd.read_csv('Energy_consumption.csv')

In [4]:
# Create binary classification target based on median energy consumption
median_consumption = data['EnergyConsumption'].median()
data['EnergyClass'] = np.where(data['EnergyConsumption'] > median_consumption, 'High', 'Low')

In [5]:
# Drop unused columns
data = data.drop(columns=['Timestamp', 'EnergyConsumption'])

In [6]:
# Encode categorical variables
label_encoders = {}
for col in data.select_dtypes(include='object').columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

In [7]:
# Prepare training and testing data
X = data.drop('EnergyClass', axis=1)
y = data['EnergyClass']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Get numeric label for 'High' class
high_label_encoded = label_encoders['EnergyClass'].transform(['High'])[0]

In [9]:
# Define models
models = {
    "Naive Bayes": GaussianNB(),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "CART": DecisionTreeClassifier(criterion='gini', random_state=42),
    "ID3": DecisionTreeClassifier(criterion='entropy', random_state=42),
    "C4.5": DecisionTreeClassifier(criterion='entropy', random_state=42)  # Approximated
}

In [10]:
# Train and evaluate models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results[name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, pos_label=high_label_encoded),
        "Recall": recall_score(y_test, y_pred, pos_label=high_label_encoded),
        "F1 Score": f1_score(y_test, y_pred, pos_label=high_label_encoded)
    }

In [11]:
# Convert results to DataFrame
results_df = pd.DataFrame(results).T
print(results_df)

             Accuracy  Precision    Recall  F1 Score
Naive Bayes     0.760   0.795918  0.735849  0.764706
KNN             0.550   0.581633  0.537736  0.558824
CART            0.665   0.696970  0.650943  0.673171
ID3             0.670   0.692308  0.679245  0.685714
C4.5            0.670   0.692308  0.679245  0.685714
