In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from  sklearn.preprocessing import OneHotEncoder
import io
from google.colab import files

In [None]:
uploaded = files.upload()

Saving aquasense_ai_simulated_data.csv to aquasense_ai_simulated_data.csv


In [None]:
data = pd.read_csv(io.BytesIO(uploaded['aquasense_ai_simulated_data.csv']))

In [None]:
data.shape

(10000, 6)

In [None]:
training_data_pd = data.iloc[:8000,:]
testing_data_pd = data.iloc[8000:,:]

In [None]:
training_data = training_data_pd.iloc[:, :-1].values
testing_data = testing_data_pd.iloc[:, :-1].values

In [None]:
training_data[:5]

array([['2025-04-01 08:00:00', 'Zone C', 2.67, 13.57, 25],
       ['2025-04-01 08:05:00', 'Zone C', 2.75, 15.04, 25],
       ['2025-04-01 08:10:00', 'Zone C', 2.94, 14.19, 20],
       ['2025-04-01 08:15:00', 'Zone C', 2.21, 20.01, 22],
       ['2025-04-01 08:20:00', 'Zone B', 2.52, 15.4, 18]], dtype=object)

In [None]:
training_y = training_data_pd.iloc[:, -1].values
testing_y = testing_data_pd.iloc[:, -1].values

we will preprocess the data using one hot encoding methods

In [None]:
onehot = OneHotEncoder()

In [None]:
onehot.fit(training_data)
encoded_training_data = onehot.transform(training_data).toarray()

onehot.fit(testing_data)
encoced_testing_data = onehot.transform(testing_data).toarray()


In [None]:
encoded_training_data[1][:5]

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    roc_auc_score,
    roc_curve
)
import io
from google.colab import files



In [None]:
uploaded = files.upload()
data = pd.read_csv(io.BytesIO(uploaded['aquasense_ai_simulated_data.csv']))

Saving aquasense_ai_simulated_data.csv to aquasense_ai_simulated_data.csv


In [None]:
train_pd = data.iloc[:8000, :].reset_index(drop=True)
test_pd  = data.iloc[8000:, :].reset_index(drop=True)

In [None]:
X_train = train_pd.drop(columns=['label'])
X_test  = test_pd.drop(columns=['label'])
y_train = train_pd['label'].values
y_test  = test_pd['label'].values


In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(sparse_output=False), ['zone']),
        ('num', StandardScaler(), ['pressure', 'flow', 'temperature']),
    ]
)

X_train_enc = preprocessor.fit_transform(X_train)
X_test_enc  = preprocessor.transform(X_test)

In [None]:
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)

In [None]:
clf = LogisticRegression(max_iter=1000, solver='lbfgs')
clf.fit(X_train_enc, y_train_enc)

In [None]:
y_pred      = clf.predict(X_test_enc)
y_pred_prob = clf.predict_proba(X_test_enc)[:,1]

In [None]:
acc         = accuracy_score(y_test_enc, y_pred)
report      = classification_report(y_test_enc, y_pred, target_names=le.classes_)
cm          = confusion_matrix(y_test_enc, y_pred)
roc_auc     = roc_auc_score(y_test_enc, y_pred_prob)

print(f"Accuracy: {acc:.4f}")
print(f"ROC AUC : {roc_auc:.4f}\n")
print("Classification Report:\n", report)
print("Confusion Matrix:\n", cm)


Accuracy: 1.0000
ROC AUC : 1.0000

Classification Report:
               precision    recall  f1-score   support

        leak       1.00      1.00      1.00       202
      normal       1.00      1.00      1.00      1798

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000

Confusion Matrix:
 [[ 202    0]
 [   0 1798]]


In [None]:
acc         = accuracy_score(y_test_enc, y_pred)
report      = classification_report(y_test_enc, y_pred, target_names=le.classes_)
cm          = confusion_matrix(y_test_enc, y_pred)
roc_auc     = roc_auc_score(y_test_enc, y_pred_prob)

print(f"Accuracy: {acc:.4f}")
print(f"ROC AUC : {roc_auc:.4f}\n")
print("Classification Report:\n", report)
print("Confusion Matrix:\n", cm)

import matplotlib.pyplot as plt

fpr, tpr, _ = roc_curve(y_test_enc, y_pred_prob)
plt.figure(figsize=(6,6))
plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}")
plt.plot([0,1], [0,1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.grid(True)
plt.tight_layout()
plt.show()
