# Deep Belief Model

In [None]:
# Base Improved
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, cohen_kappa_score, roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.neural_network import MLPClassifier

# Load dataset
df = pd.read_csv("df.csv")  # Replace with actual file path

# Encoding categorical labels
label_encoder = LabelEncoder()
df["Class"] = label_encoder.fit_transform(df["Class"])
df["theft"] = label_encoder.fit_transform(df["theft"])

# Define features and target variable
feature_cols = [
    "Electricity:Facility [kW](Hourly)", "Fans:Electricity [kW](Hourly)", "Cooling:Electricity [kW](Hourly)",
    "Heating:Electricity [kW](Hourly)", "InteriorLights:Electricity [kW](Hourly)", "InteriorEquipment:Electricity [kW](Hourly)",
    "Gas:Facility [kW](Hourly)", "Heating:Gas [kW](Hourly)", "InteriorEquipment:Gas [kW](Hourly)",
    "Water Heater:WaterSystems:Gas [kW](Hourly)"
]
X = df[feature_cols]
y = df["theft"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Function to evaluate models
def evaluate_model(model, name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    auc_score = roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovr') if hasattr(model, "predict_proba") else None
    
    results = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "F1-score": f1_score(y_test, y_pred, average='weighted'),
        "Kappa": cohen_kappa_score(y_test, y_pred),
        "AUC": auc_score
    }
    print(f"Results for {name}:\n", results, "\n")
    return results

# Define models with pipelines
pipelines = {
    "KNN": Pipeline([
        ('scaler', MinMaxScaler()),
        ('classifier', KNeighborsClassifier())
    ]),
    "DecisionTree": Pipeline([
        ('scaler', MinMaxScaler()),
        ('classifier', DecisionTreeClassifier())
    ]),
    "RandomForest": Pipeline([
        ('scaler', MinMaxScaler()),
        ('classifier', RandomForestClassifier())
    ]),
    "Bagging": Pipeline([
        ('scaler', MinMaxScaler()),
        ('classifier', BaggingClassifier(KNeighborsClassifier()))
    ]),
    "ANN": Pipeline([
        ('scaler', MinMaxScaler()),
        ('classifier', MLPClassifier(max_iter=500))
    ])
}

# Define hyperparameter grids for GridSearchCV
param_grids = {
    "KNN": {"classifier__n_neighbors": [5, 10, 15]},
    "DecisionTree": {"classifier__max_depth": [5, 10, None]},
    "RandomForest": {"classifier__n_estimators": [50, 100, 200]},
    "Bagging": {"classifier__n_estimators": [5, 10, 20]},
    "ANN": {
    "classifier__hidden_layer_sizes": [(20, 20), (50, 50), (100, 50)],  
    "classifier__max_iter": [300, 500, 1000],  
    "classifier__solver": ["adam", "lbfgs"],  
    "classifier__alpha": [0.0001, 0.001],  
    "classifier__early_stopping": [True]  
    }

}

# Perform GridSearchCV to optimize models
best_models = {}
for name, pipeline in pipelines.items():
    print(f"Optimizing {name}...")
    grid_search = GridSearchCV(pipeline, param_grid=param_grids[name], cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    best_models[name] = grid_search.best_estimator_
    print(f"Best parameters for {name}: {grid_search.best_params_}")

# Evaluate optimized models
final_results = {name: evaluate_model(model, name) for name, model in best_models.items()}

# Print final comparison
print("Final Model Comparison:\n", final_results)
