In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, f1_score, cohen_kappa_score, roc_auc_score, confusion_matrix
import xgboost as xgb
import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# Check GPU availability
gpu_available = xgb.XGBClassifier().get_params().get('device', 'cpu') == 'cuda'
print(f"GPU Available: {gpu_available}")

# Load dataset
df = pd.read_csv("df.csv")
label_encoder = LabelEncoder()
df["Class"] = label_encoder.fit_transform(df["Class"])
df["theft"] = label_encoder.fit_transform(df["theft"])

# Feature selection
feature_cols = [
    "Electricity:Facility [kW](Hourly)", "Fans:Electricity [kW](Hourly)", "Cooling:Electricity [kW](Hourly)",
    "Heating:Electricity [kW](Hourly)", "InteriorLights:Electricity [kW](Hourly)", "InteriorEquipment:Electricity [kW](Hourly)",
    "Gas:Facility [kW](Hourly)", "Heating:Gas [kW](Hourly)", "InteriorEquipment:Gas [kW](Hourly)",
    "Water Heater:WaterSystems:Gas [kW](Hourly)"
]
X = df[feature_cols].values
y = df["theft"].values
n_classes = len(np.unique(y))

# Compute class weights
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(y), y=y)
class_weight_dict = {i: class_weights[i] for i in np.unique(y)}

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Normalize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Evaluation function
def evaluate_model(y_true, y_pred, y_proba, model_name):
    results = {
        "Accuracy": accuracy_score(y_true, y_pred),
        "F1-score": f1_score(y_true, y_pred, average="weighted"),
        "Kappa": cohen_kappa_score(y_true, y_pred),
        "AUC": roc_auc_score(y_true, y_proba, multi_class="ovr")
    }
    print(f"\n🔍 {model_name} Results:")
    print(results)
    print(f"\nConfusion Matrix ({model_name}):\n", confusion_matrix(y_true, y_pred))


In [None]:
xgb_model = xgb.XGBClassifier(
    learning_rate=0.09144751092423227,
    max_depth=12,
    min_child_weight=7,
    subsample=0.8781111961629534,
    colsample_bytree=0.8584849992349836,
    n_estimators=200,
    tree_method="gpu_hist" if gpu_available else "hist",
    objective="multi:softprob", num_class=n_classes,
    device="cuda" if gpu_available else "cpu", n_jobs=-1
)

xgb_model.fit(X_train, y_train, sample_weight=np.array([class_weight_dict[i] for i in y_train]))
y_pred = xgb_model.predict(X_test)
y_proba = xgb_model.predict_proba(X_test)

evaluate_model(y_test, y_pred, y_proba, "XGBoost")


In [None]:
lgb_model = lgb.LGBMClassifier(
    boosting_type='gbdt',
    learning_rate=0.02893527514545242,
    num_leaves=299,
    max_depth=11,
    min_data_in_leaf=15,
    max_bin=393,
    feature_fraction=0.7447012597505922,
    random_state=42,
    is_unbalance=True,
    device="gpu" if gpu_available else "cpu"
)

lgb_model.fit(X_train, y_train)
y_pred = lgb_model.predict(X_test)
y_proba = lgb_model.predict_proba(X_test)

evaluate_model(y_test, y_pred, y_proba, "LightGBM")


In [None]:
logreg_model = LogisticRegression(
    solver="lbfgs",
    max_iter=506,
    C=11344.3650513728,
    class_weight="balanced",
    multi_class="multinomial",
    n_jobs=-1
)

logreg_model.fit(X_train, y_train)
y_pred = logreg_model.predict(X_test)
y_proba = logreg_model.predict_proba(X_test)

evaluate_model(y_test, y_pred, y_proba, "Logistic Regression")


In [None]:
rf_model = RandomForestClassifier(
    n_estimators=136,
    max_depth=12,
    min_samples_split=7,
    min_samples_leaf=17,
    max_features='sqrt',
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
y_proba = rf_model.predict_proba(X_test)

evaluate_model(y_test, y_pred, y_proba, "Random Forest")


In [None]:
from sklearn.ensemble import ExtraTreesClassifier

et_model = ExtraTreesClassifier(
    n_estimators=121,
    max_depth=12,
    min_samples_split=3,
    min_samples_leaf=9,
    max_features=None,
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

et_model.fit(X_train, y_train)
y_pred = et_model.predict(X_test)
y_proba = et_model.predict_proba(X_test)

evaluate_model(y_test, y_pred, y_proba, "Extra Trees Classifier")
