In [4]:
pip install xgboost

Collecting xgboost
  Downloading xgboost-2.1.3-py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.3-py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: xgboost
Successfully installed xgboost-2.1.3
Note: you may need to restart the kernel to use updated packages.


In [7]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [11]:
## Create an imbalanced binary Classification dataset
x, y = make_classification(n_samples=100, n_features=10, n_informative=2, n_redundant=8, weights=[0.9,0.1], flip_y=0, random_state=42)
np.unique(y,return_counts=True)

(array([0, 1]), array([90, 10]))

In [12]:
# Train Test Split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3, stratify=y, random_state=42)

In [14]:
params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "multi_class": "auto"
}
lr = LogisticRegression(**params)
lr.fit(x_train,y_train)

y_pred = lr.predict(x_test)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.93      1.00      0.96        27
           1       1.00      0.33      0.50         3

    accuracy                           0.93        30
   macro avg       0.97      0.67      0.73        30
weighted avg       0.94      0.93      0.92        30



In [15]:
report_dict = classification_report(y_test, y_pred, output_dict = True)
report_dict

{'0': {'precision': 0.9310344827586207,
  'recall': 1.0,
  'f1-score': 0.9642857142857143,
  'support': 27.0},
 '1': {'precision': 1.0,
  'recall': 0.3333333333333333,
  'f1-score': 0.5,
  'support': 3.0},
 'accuracy': 0.9333333333333333,
 'macro avg': {'precision': 0.9655172413793103,
  'recall': 0.6666666666666666,
  'f1-score': 0.7321428571428572,
  'support': 30.0},
 'weighted avg': {'precision': 0.9379310344827586,
  'recall': 0.9333333333333333,
  'f1-score': 0.9178571428571428,
  'support': 30.0}}

In [16]:
import mlflow

In [18]:
mlflow.set_experiment("first Experiment")
mlflow.set_tracking_uri("http://127.0.0.1:5000/")

with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metrics({
        'accuracy': report_dict['accuracy'],
        'recall_class_0': report_dict['0']['recall'],
        'recall_class_1': report_dict['1']['recall'],
        'f1_score_macro': report_dict['macro avg']['f1-score']
    })
    mlflow.sklearn.log_model(lr, "LogisticRegression")

2024/12/12 12:43:30 INFO mlflow.tracking.fluent: Experiment with name 'first Experiment' does not exist. Creating a new experiment.


🏃 View run likeable-hawk-29 at: http://127.0.0.1:5000/#/experiments/199319715451295480/runs/ec6754ea009b4845912748d14f4e9070
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/199319715451295480
