In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from xgboost import XGBClassifier
import pickle
import joblib

In [None]:
df = pd.read_parquet('../data/all_data_preprocessed/all_merged.parquet')

In [None]:
X = df.drop(columns=[
    'event_all_region','alarms_in_regions',"event_lastDay_region",
    'event_1h_ago',"event_lastDay_region"])
y = df['event_all_region']

In [None]:
X.rename(columns={"event_2h_ago": "status"}, inplace=True)

In [None]:
tscv = TimeSeriesSplit(n_splits=5)
splits = list(tscv.split(X))
train_idx, test_idx = splits[-1]
X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

In [None]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
xgb_model = XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=6,
    random_state=42,
    eval_metric='aucpr',
)


In [None]:
xgb_model.fit(X_train_scaled, y_train)

In [None]:
y_pred = xgb_model.predict(X_test_scaled)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
confusion_matrix(y_test, y_pred)

In [None]:
f1_score(y_test, y_pred)

In [None]:
print(
classification_report(y_test, y_pred))

In [None]:
importance = xgb_model.feature_importances_
feature_names = X_train.columns
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importance})

In [None]:
importance_df = importance_df.sort_values(by='Importance', ascending=False)

In [None]:
top_20_features = importance_df.head(20)
print(top_20_features)

In [None]:
with open('../src/our_models/3_Xgboost_2hour.pkl', 'wb') as f:
    pickle.dump(xgb_model, f)

In [None]:
X_train

In [None]:
X

In [None]:
joblib.dump(scaler, '../src/our_models/scaler_2hour.pkl')