In [None]:
# Import necessary libraries
import pandas as pd
import mysql.connector
from sqlalchemy import create_engine, Column, Integer, Float, String, DateTime
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, declarative_base
from sklearn.cluster import DBSCAN
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, recall_score, roc_auc_score, make_scorer, accuracy_score, precision_score, f1_score
from sklearn.utils import resample
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.model_selection import GridSearchCV

from sklearn import svm
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Configure database connection
db_config = {
    'host': 'localhost',
    'user': 'root',
    'password': '',
    'database': 'db_esp32'
}

In [None]:
# Fetch data from the database with only power > 0 and order by created_at in descending order
def fetch_data():
    conn = mysql.connector.connect(**db_config)
    query = f"SELECT * FROM electricity_monitor WHERE power > 0 ORDER BY created_at DESC"
    df = pd.read_sql(query, conn)
    conn.close()
    return df

In [None]:
# Resample the data to 10% of the original dataset
def sampled_data():
    df_data = fetch_data()
    df_sampled = resample(df_data, n_samples=int(len(df_data) * 0.5), random_state=42)

    return df_sampled
    

In [None]:
# Calculate features
def calculate_features(df):
    df['mean_consumption'] = df['energy'].mean()
    df['std_deviation'] = df['energy'].std()
    df['peak_consumption'] = df['energy'].max()
    return df[['mean_consumption', 'std_deviation', 'peak_consumption', 'created_at']].iloc[-1]


In [None]:
# Label data using DBSCAN
def label_data(df):
    clustering = DBSCAN(eps=0.5, min_samples=5).fit(df[['mean_consumption', 'std_deviation']])
    df['label'] = clustering.labels_
    return df

In [None]:
# Unsupervised Isolation Forest
def isolation_forest(df):
    clf = DecisionTreeClassifier()
    clf.fit(df[['mean_consumption', 'std_deviation']], df['label'])
    y_pred = clf.predict(df[['mean_consumption', 'std_deviation']])
    print(confusion_matrix(df['label'], y_pred))
    print(classification_report(df['label'], y_pred))

In [None]:
# Train Decision Tree model
def train_supervised_model(df):
    X = df[['mean_consumption', 'std_deviation']]
    y = df['label']
    model = DecisionTreeClassifier()
    model.fit(X, y)
    return model, X, y

In [None]:
df = fetch_data()
df_sampled = sampled_data()

In [None]:
features = calculate_features(df)
print("Calculated Features:")
print(features)

In [None]:
# Label data
df = label_data(df)
print("Labelled Data:")
print(df.head())


In [None]:
# Train the model
model, X, y = train_supervised_model(df)
print("Model Trained")


In [None]:
# Predict and evaluate the model
y_pred = model.predict(X)
labels = df['label'].unique()
print("Classification Report:")
print(classification_report(y, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y, y_pred, labels=labels))
print("Feature Importance:")
print(labels)

In [None]:
# Visualize the results
sns.scatterplot(x='mean_consumption', y='std_deviation', hue='label', data=df)
plt.title('DBSCAN Clustering')
plt.show()

sns.scatterplot(x='mean_consumption', y='std_deviation', hue=y_pred, data=df)
plt.title('Decision Tree Predictions')
plt.show()