In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('/content/surge_data.csv')

In [3]:
X = data.drop('flag', axis=1)
y = data['flag']

## Borderline-SMOTE

In [4]:
from imblearn.over_sampling import BorderlineSMOTE

In [6]:
borderline_smote = BorderlineSMOTE(random_state=42)

In [7]:
X_resampled, y_resampled = borderline_smote.fit_resample(X, y)

In [9]:
resampled_data = pd.DataFrame(X_resampled, columns=X.columns)
resampled_data['flag'] = y_resampled

In [10]:
print(resampled_data['flag'].value_counts())

flag
0.0    9931
1.0    9931
Name: count, dtype: int64


## Logistic Regression

In [11]:
pip install river

Collecting river
  Downloading river-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting pandas<3.0.0,>=2.2.3 (from river)
  Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting scipy<2.0.0,>=1.14.1 (from river)
  Downloading scipy-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Downloading river-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m42.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K   [90m━━━━━━━━━━━━━━━

In [12]:
from river import compose
from river import preprocessing
from river import linear_model
from river import metrics

In [13]:
x = resampled_data[['voltage', 'current', 'power_consumption']]
y = resampled_data['flag']

In [14]:
model = compose.Pipeline(
    ('scaler', preprocessing.StandardScaler()),
    ('classifier', linear_model.LogisticRegression())
)

In [15]:
accuracy_metric = metrics.Accuracy()

In [16]:
confusion_matrix_metric = metrics.ConfusionMatrix()

In [17]:
for xi, yi in zip(x.to_dict(orient='records'), y):
    y_pred = model.predict_one(xi)
    model.learn_one(xi, yi)
    accuracy_metric.update(yi, y_pred)
    confusion_matrix_metric.update(yi, y_pred)

In [18]:
print(f'Final accuracy: {accuracy_metric}')

Final accuracy: Accuracy: 99.48%


In [19]:
print("Confusion Matrix:")
print(confusion_matrix_metric)

Confusion Matrix:
      0.0     1.0    
0.0   9,930       1  
1.0     102   9,829  
