In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_curve, auc, precision_recall_curve, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report

In [7]:
df = pd.read_parquet('../data/all_data_preprocessed/all_merged.parquet')

In [8]:
X = df.drop(columns=['event_all_region','alarms_in_regions','event_24h_ago','event_2h_ago','event_3h_ago','event_6h_ago','event_12h_ago','event_18h_ago'])
y = df['event_all_region']

In [9]:
tscv = TimeSeriesSplit(n_splits=5)
splits = list(tscv.split(X))

In [10]:
train_idx, test_idx = splits[-1]
X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

In [11]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [12]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
rf_model = RandomForestClassifier(n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=1,random_state=42,n_jobs=-1)

In [14]:
rf_model.fit(X_train_scaled, y_train)

KeyboardInterrupt: 

In [10]:
y_pred = rf_model.predict(X_test_scaled)

In [11]:
ac = accuracy_score(y_test, y_pred)

In [12]:
ac

0.9261582639531298

In [13]:
f1 = f1_score(y_test, y_pred, average='weighted')
f1

0.9226277806525142

In [14]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[83813,  1941],
       [ 5722, 12300]])

In [15]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.94      0.98      0.96     85754
         1.0       0.86      0.68      0.76     18022

    accuracy                           0.93    103776
   macro avg       0.90      0.83      0.86    103776
weighted avg       0.92      0.93      0.92    103776

