In [3]:
#!pip install xgboost

In [9]:
import pandas as pd 
import numpy as np 
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
#from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings

warnings.filterwarnings('ignore')

In [10]:
X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8,
                          weights=[0.9, 0.1], flip_y=0, random_state=42)

np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100], dtype=int64))

In [11]:
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# Experiment 1: Train Logistic Regression Classifier  

In [13]:
log_reg = LogisticRegression(C=1, solver='liblinear')
log_reg.fit(train_x, train_y)
y_pred_log_reg = log_reg.predict(test_x)
print(classification_report(y_pred_log_reg, test_y))


              precision    recall  f1-score   support

           0       0.96      0.95      0.95       275
           1       0.50      0.60      0.55        25

    accuracy                           0.92       300
   macro avg       0.73      0.77      0.75       300
weighted avg       0.92      0.92      0.92       300



# Experiment 2: Train Random Forest Classifier 

In [14]:
rf_clf = RandomForestClassifier(n_estimators=30, max_depth=3)
rf_clf.fit(train_x, train_y)
y_pred_rf = rf_clf.predict(test_x)
print(classification_report(y_pred_rf, test_y))

              precision    recall  f1-score   support

           0       1.00      0.97      0.98       278
           1       0.70      0.95      0.81        22

    accuracy                           0.97       300
   macro avg       0.85      0.96      0.89       300
weighted avg       0.97      0.97      0.97       300



# Track Experiments Using MLFlow 

In [15]:
models = [
    (
        "Logistic Regression",
        LogisticRegression(C=1, solver='liblinear'),
        (train_x, train_y),
        (test_x, test_y)
    ),
    (
        "Random Forest",
        RandomForestClassifier(n_estimators=30, max_depth=3),
        (train_x, train_y),
        (test_x, test_y)
    )
]

In [16]:
reports = []

for model_name, model, train_set, test_set in models:
    train_x = train_set[0]
    train_y = train_set[1]
    test_x = test_set[0]
    test_y = test_set[1]

    model.fit(train_x, train_y)
    y_pred = model.predict(test_x)
    report = classification_report(test_y, y_pred, output_dict=True)
    reports.append(report)

In [17]:
reports

[{'0': {'precision': 0.9454545454545454,
   'recall': 0.9629629629629629,
   'f1-score': 0.9541284403669725,
   'support': 270.0},
  '1': {'precision': 0.6,
   'recall': 0.5,
   'f1-score': 0.5454545454545454,
   'support': 30.0},
  'accuracy': 0.9166666666666666,
  'macro avg': {'precision': 0.7727272727272727,
   'recall': 0.7314814814814814,
   'f1-score': 0.749791492910759,
   'support': 300.0},
  'weighted avg': {'precision': 0.9109090909090909,
   'recall': 0.9166666666666666,
   'f1-score': 0.91326105087573,
   'support': 300.0}},
 {'0': {'precision': 0.9676258992805755,
   'recall': 0.9962962962962963,
   'f1-score': 0.9817518248175182,
   'support': 270.0},
  '1': {'precision': 0.9545454545454546,
   'recall': 0.7,
   'f1-score': 0.8076923076923077,
   'support': 30.0},
  'accuracy': 0.9666666666666667,
  'macro avg': {'precision': 0.961085676913015,
   'recall': 0.8481481481481481,
   'f1-score': 0.8947220662549129,
   'support': 300.0},
  'weighted avg': {'precision': 0.9663

In [18]:
import mlflow 

In [21]:
mlflow.set_experiment("Anomaly Detection")
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

for i, element in enumerate(models):
    model_name = element[0]
    model = element[1]
    report = reports[i]

    with mlflow.start_run(run_name=model_name):
        mlflow.log_param('model_name', model_name)
        mlflow.log_metric('accuracy', report['accuracy'])
        mlflow.log_metric('recall_class_1', report['1']['recall'])
        mlflow.log_metric('recall_class_0', report['0']['recall'])
        mlflow.log_metric('f1_score_macro', report['macro avg']['f1-score'])

🏃 View run Logistic Regression at: http://127.0.0.1:5000/#/experiments/804890674773640442/runs/863cbb1de1c8445896432c25edd83499
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/804890674773640442
🏃 View run Random Forest at: http://127.0.0.1:5000/#/experiments/804890674773640442/runs/78c9d90b1661465684047917c72f32fd
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/804890674773640442
