## 1. Importing Packages

In [1]:
# -- data loading --
import pandas as pd
import numpy as np

# -- visualization --
import matplotlib.pyplot as plt
import seaborn as sns

# -- embeddings --
from sklearn.feature_extraction.text import TfidfVectorizer

# --- ml models ---
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

# -- model evaluation --
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score, classification_report

# -- utility --
import mlflow
import os
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
os.getcwd()

'/Users/rahulshelke/Documents/Data-Science/Data-Science-Projects/bbc-news-sorting/notebooks'

## 2. Loading Clean Data

In [2]:
train_df = pd.read_csv("data/train_clean_data.csv")
train_df.shape

(1490, 5)

In [3]:
train_df.head()

Unnamed: 0,ArticleId,Text,Category,clean_text,Label
0,1833,worldcom ex-boss launches defence lawyers defe...,business,worldcom ex boss launches defence lawyers defe...,0
1,154,german business confidence slides german busin...,business,german business confidence slides german busin...,0
2,1101,bbc poll indicates economic gloom citizens in ...,business,bbc poll indicates economic gloom citizens maj...,0
3,1976,lifestyle governs mobile choice faster bett...,tech,lifestyle governs mobile choice faster better ...,1
4,917,enron bosses in $168m payout eighteen former e...,business,enron bosses payout eighteen former enron dire...,0


In [4]:
y = train_df['Label'].values

## 3. Cross Validation

In [5]:
def stratified_k_fold_cv_f1(model, X, y, n_splits=5, shuffle=True, random_state=98):
    """
    Perform Stratified K-Fold Cross Validation and return the average F1 score.
    
    Parameters:
    - model: The machine learning model to evaluate.
    - X: The feature matrix (data).
    - y: The target vector (labels).
    - n_splits: Number of splits/folds for cross-validation (default is 5).
    - shuffle: Whether to shuffle the data before splitting (default is True).
    - random_state: Seed for random number generator to ensure reproducibility.
    
    Returns:
    - avg_f1: The average F1 score across all folds.
    """
    
    # Initialize StratifiedKFold with specified number of splits and options
    skf = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_state)
    
    f1_scores = []  # List to store F1 score for each fold
    
    # Split the data into train and test sets for each fold
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        # Train the model on the training data
        model.fit(X_train, y_train)
        
        # Make predictions on the test data
        y_pred = model.predict(X_test)
        
        # Calculate and store the F1 score for this fold
        f1_scores.append(f1_score(y_test, y_pred, average='weighted'))
    
    # Calculate the average F1 score across all folds
    avg_f1 = sum(f1_scores) / len(f1_scores)
    avg_std_f1 = np.std(f1_scores)
    return avg_f1, avg_std_f1

In [6]:
def train_performance(model, X, y, n_splits=5, shuffle=True, random_state=98):
    avg_f1, std_f1 = stratified_k_fold_cv_f1(model, X, y, n_splits, shuffle, random_state)
    print(f"Train Avg F1 Score: {avg_f1} Std: {std_f1}")

## 4. One-Hot Encoding

**Steps:**
1. **Create a vocabulary:** A vocabulary is a list of unique words from your corpus (the entire set of documents).

2. **Generate one-hot vectors:** For each word in the corpus, create a vector of zeros where the index corresponding to that word in the vocabulary is set to 1.

In [7]:
corpus = train_df["clean_text"].to_list()

In [8]:
# Step 1: Create a vocabulary (a set of unique words)
def create_vocabulary(corpus):
    vocabulary = set()
    for text in corpus:
        words = text.split()  # Convert to lower case and split by spaces
        vocabulary.update(words)
    return sorted(vocabulary)

In [9]:
# Create the vocabulary
vocabulary = create_vocabulary(corpus)

In [10]:
print("Vocabulary:", vocabulary[1000:1010])

Vocabulary: ['applications', 'applied', 'applies', 'apply', 'applying', 'appoint', 'appointed', 'appointment', 'appointments', 'appoints']


In [11]:
# Step 2: One-hot encoding function
def one_hot_encode(corpus, vocabulary):
    # Create a mapping of words to their respective indices in the vocabulary
    word_to_index = {word: index for index, word in enumerate(vocabulary)}
    
    # Initialize a list to hold the one-hot encoded vectors for each document
    one_hot_encoded_corpus = []
    
    for text in corpus:
        # Initialize a vector of zeros of the same length as the vocabulary
        one_hot_vector = np.zeros(len(vocabulary))
        
        # For each word in the document, set the corresponding position in the vector to 1
        for word in text.lower().split():
            index = word_to_index[word]  # Get the index of the word in the vocabulary
            one_hot_vector[index] = 1   # Set the corresponding index to 1
        
        one_hot_encoded_corpus.append(one_hot_vector)
    
    return np.array(one_hot_encoded_corpus)

In [12]:
# Get the one-hot encoded corpus
one_hot_corpus = one_hot_encode(corpus, vocabulary)

In [13]:
one_hot_corpus.shape

(1490, 23408)

In [43]:
# df = pd.DataFrame(one_hot_corpus, columns=vocabulary)

# # Plot the heatmap using seaborn
# plt.figure(figsize=(10, 6))
# sns.heatmap(df, annot=True, cmap="YlGnBu", cbar=True, xticklabels=df.columns, yticklabels=[f"Doc {i+1}" for i in range(df.shape[0])])

# # Customize the title and labels
# plt.title("OneHot Heatmap", fontsize=16)
# plt.xlabel("Terms", fontsize=12)
# plt.ylabel("Documents", fontsize=12)

# # Display the heatmap
# plt.show()

### 4.1 Logistic Regression on One Hot

In [14]:
# %time
# # Start an experiment (if not already created)
# experiment_name = "One_Hot_Embeddings"
# mlflow.set_experiment(experiment_name)

# model_name = "Logistic Regression"
# model_save_name = "logistic_regression_model"

# # Start MLflow run
# with mlflow.start_run(run_name=f"{experiment_name} + {model_name}"):

#     # initializing model
#     classifier = LogisticRegression(max_iter=2000)

#     # cross validation
#     avg_f1_score, avg_f1_std = stratified_k_fold_cv_f1(classifier, one_hot_encode, y)

#     # Log metrics (e.g., f1 score, precision, recall, F1-score)
#     mlflow.log_metric("avg f1 score", avg_f1_score)
#     mlflow.log_metric("avg f1 std", avg_f1_std)
#     mlflow.log_param("models", model_name)

#     print(f"Logged {model_name} model with avg f1 score: {avg_f1_score}")
#     print(f"Logged {model_name} model with avg f1 std: {avg_f1_std}")

In [13]:
# X_train.shape, y_train.shape, X_test.shape, y_test.shape

### Model Training

In [46]:
# Start an experiment (if not already created)
mlflow.set_experiment("One_Hot_Embeddings")

model_name = "Logistic Regression"
model_save_name = "logistic_regression_model"

# Start MLflow run
with mlflow.start_run(run_name=f"One Hot Enbedding + {model_name}"):

    # initializing model
    classifier = LogisticRegression(max_iter=1000)
    # Train the model
    classifier.fit(X_train, y_train)

    # Predict
    y_pred_train = classifier.predict(X_train)
    y_pred_test = classifier.predict(X_test)

    # Evaluate the model
    f1_score_train = f1_score(y_train, y_pred_train, average='micro')
    f1_score_test = f1_score(y_test, y_pred_test, average='micro')
    train_report = classification_report(y_train, y_pred_train, output_dict=True)
    test_report = classification_report(y_test, y_pred_test, output_dict=True)

    # Log parameters (e.g., model hyperparameters)
    mlflow.log_param("model", f"{model_name}")

    # Log metrics (e.g., f1 score, precision, recall, F1-score)
    mlflow.log_metric("train f1 score", f1_score_train)
    for label, metrics in train_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    mlflow.log_metric("test f1 score", f1_score_test)
    for label, metrics in test_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    # Log the trained model
    mlflow.sklearn.log_model(classifier, model_save_name, input_example=X_train[0].reshape(1, -1))

    # End the MLflow run (automatically done when exiting the context)
    # print(f"Model trained and logged with run ID: {mlflow.active_run().info.run_id}")

    print(f"Logged {model_name} model with training f1 score: {f1_score_train}")
    print(f"Logged {model_name} model with testing f1 score: {f1_score_test}")


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 5185.47it/s] 

Logged Logistic Regression model with training f1 score: 1.0
Logged Logistic Regression model with testing f1 score: 0.9642058165548099





In [47]:
model_name = "Support Vector Classifier"
model_save_name = "support_vector_model"

# Start MLflow run
with mlflow.start_run(run_name=f"One Hot Enbedding + {model_name}"):

    # initializing model
    classifier = SVC()
    # Train the model
    classifier.fit(X_train, y_train)

    # Predict
    y_pred_train = classifier.predict(X_train)
    y_pred_test = classifier.predict(X_test)

    # Evaluate the model
    f1_score_train = f1_score(y_train, y_pred_train, average='micro')
    f1_score_test = f1_score(y_test, y_pred_test, average='micro')
    train_report = classification_report(y_train, y_pred_train, output_dict=True)
    test_report = classification_report(y_test, y_pred_test, output_dict=True)

    # Log parameters (e.g., model hyperparameters)
    mlflow.log_param("model", f"{model_name}")

    # Log metrics (e.g., f1 score, precision, recall, F1-score)
    mlflow.log_metric("train f1 score", f1_score_train)
    for label, metrics in train_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    mlflow.log_metric("test f1 score", f1_score_test)
    for label, metrics in test_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    # Log the trained model
    mlflow.sklearn.log_model(classifier, model_save_name, input_example=X_train[0].reshape(1, -1))

    # End the MLflow run (automatically done when exiting the context)
    # print(f"Model trained and logged with run ID: {mlflow.active_run().info.run_id}")

    print(f"Logged {model_name} model with training f1 score: {f1_score_train}")
    print(f"Logged {model_name} model with testing f1 score: {f1_score_test}")

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 120.66it/s]  

Logged Support Vector Classifier model with training f1 score: 1.0
Logged Support Vector Classifier model with testing f1 score: 0.9485458612975392





In [48]:
model_name = "Random Forest Classifier"
model_save_name = "random_forest_model"

# Start MLflow run
with mlflow.start_run(run_name=f"One Hot Enbedding + {model_name}"):

    # initializing model
    classifier = RandomForestClassifier()
    # Train the model
    classifier.fit(X_train, y_train)

    # Predict
    y_pred_train = classifier.predict(X_train)
    y_pred_test = classifier.predict(X_test)

    # Evaluate the model
    f1_score_train = f1_score(y_train, y_pred_train, average='micro')
    f1_score_test = f1_score(y_test, y_pred_test, average='micro')
    train_report = classification_report(y_train, y_pred_train, output_dict=True)
    test_report = classification_report(y_test, y_pred_test, output_dict=True)

    # Log parameters (e.g., model hyperparameters)
    mlflow.log_param("model", f"{model_name}")

    # Log metrics (e.g., f1 score, precision, recall, F1-score)
    mlflow.log_metric("train f1 score", f1_score_train)
    for label, metrics in train_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    mlflow.log_metric("test f1 score", f1_score_test)
    for label, metrics in test_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    # Log the trained model
    mlflow.sklearn.log_model(classifier, model_save_name, input_example=X_train[0].reshape(1, -1))

    # End the MLflow run (automatically done when exiting the context)
    # print(f"Model trained and logged with run ID: {mlflow.active_run().info.run_id}")

    print(f"Logged {model_name} model with training f1 score: {f1_score_train}")
    print(f"Logged {model_name} model with testing f1 score: {f1_score_test}")

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 4122.46it/s] 

Logged Random Forest Classifier model with training f1 score: 1.0
Logged Random Forest Classifier model with testing f1 score: 0.9552572706935123





In [49]:
model_name = "Gradient Boosting Classifier"
model_save_name = "gradient_boosting_model"

# Start MLflow run
with mlflow.start_run(run_name=f"One Hot Enbedding + {model_name}"):

    # initializing model
    classifier = GradientBoostingClassifier(verbose=False)
    # Train the model
    classifier.fit(X_train, y_train)

    # Predict
    y_pred_train = classifier.predict(X_train)
    y_pred_test = classifier.predict(X_test)

    # Evaluate the model
    f1_score_train = f1_score(y_train, y_pred_train, average='micro')
    f1_score_test = f1_score(y_test, y_pred_test, average='micro')
    train_report = classification_report(y_train, y_pred_train, output_dict=True)
    test_report = classification_report(y_test, y_pred_test, output_dict=True)

    # Log parameters (e.g., model hyperparameters)
    mlflow.log_param("model", f"{model_name}")

    # Log metrics (e.g., f1 score, precision, recall, F1-score)
    mlflow.log_metric("train f1 score", f1_score_train)
    for label, metrics in train_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    mlflow.log_metric("test f1 score", f1_score_test)
    for label, metrics in test_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    # Log the trained model
    mlflow.sklearn.log_model(classifier, model_save_name, input_example=X_train[0].reshape(1, -1))

    # End the MLflow run (automatically done when exiting the context)
    # print(f"Model trained and logged with run ID: {mlflow.active_run().info.run_id}")

    print(f"Logged {model_name} model with training f1 score: {f1_score_train}")
    print(f"Logged {model_name} model with testing f1 score: {f1_score_test}")

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 5495.06it/s] 

Logged Gradient Boosting Classifier model with training f1 score: 1.0
Logged Gradient Boosting Classifier model with testing f1 score: 0.9485458612975392





## 5. TF-IDF Encoding

In [50]:
# Initialize TfidfVectorizer
vectorizer = TfidfVectorizer()

# Fit and transform the documents
tfidf_matrix = vectorizer.fit_transform(corpus)

tfidf_matrix = np.asarray(tfidf_matrix.toarray())

# Get the feature names (i.e., terms in the vocabulary)
feature_names = vectorizer.get_feature_names_out()

# Convert the TF-IDF matrix to a dense format (for better visualization)
dense_matrix = tfidf_matrix#.todense()

In [51]:
dense_matrix.shape

(1490, 23390)

In [52]:
X_train, X_test, y_train, y_test = train_test_split(dense_matrix, y, test_size=0.3, stratify=y, random_state=42)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1043, 23390), (1043,), (447, 23390), (447,))

### Model Training

In [53]:
# Start an experiment (if not already created)
mlflow.set_experiment("TF_IDF_Embeddings")

model_name = "Logistic Regression"
model_save_name = "logistic_regression_model"

# Start MLflow run
with mlflow.start_run(run_name=f"TF-IDF Enbedding + {model_name}"):

    # initializing model
    classifier = LogisticRegression(max_iter=1000)
    # Train the model
    classifier.fit(X_train, y_train)

    # Predict
    y_pred_train = classifier.predict(X_train)
    y_pred_test = classifier.predict(X_test)

    # Evaluate the model
    f1_score_train = f1_score(y_train, y_pred_train, average='micro')
    f1_score_test = f1_score(y_test, y_pred_test, average='micro')
    train_report = classification_report(y_train, y_pred_train, output_dict=True)
    test_report = classification_report(y_test, y_pred_test, output_dict=True)

    # Log parameters (e.g., model hyperparameters)
    mlflow.log_param("model", f"{model_name}")

    # Log metrics (e.g., f1 score, precision, recall, F1-score)
    mlflow.log_metric("train f1 score", f1_score_train)
    for label, metrics in train_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    mlflow.log_metric("test f1 score", f1_score_test)
    for label, metrics in test_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    # Log the trained model
    mlflow.sklearn.log_model(classifier, model_save_name, input_example=X_train[0].reshape(1, -1))

    # End the MLflow run (automatically done when exiting the context)
    # print(f"Model trained and logged with run ID: {mlflow.active_run().info.run_id}")

    print(f"Logged {model_name} model with training f1 score: {f1_score_train}")
    print(f"Logged {model_name} model with testing f1 score: {f1_score_test}")


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 4576.08it/s] 

Logged Logistic Regression model with training f1 score: 0.9990412272291467
Logged Logistic Regression model with testing f1 score: 0.9530201342281879





In [54]:
model_name = "Support Vector Classifier"
model_save_name = "support_vector_model"

# Start MLflow run
with mlflow.start_run(run_name=f"TF-IDF Enbedding + {model_name}"):

    # initializing model
    classifier = SVC()
    # Train the model
    classifier.fit(X_train, y_train)

    # Predict
    y_pred_train = classifier.predict(X_train)
    y_pred_test = classifier.predict(X_test)

    # Evaluate the model
    f1_score_train = f1_score(y_train, y_pred_train, average='micro')
    f1_score_test = f1_score(y_test, y_pred_test, average='micro')
    train_report = classification_report(y_train, y_pred_train, output_dict=True)
    test_report = classification_report(y_test, y_pred_test, output_dict=True)

    # Log parameters (e.g., model hyperparameters)
    mlflow.log_param("model", f"{model_name}")

    # Log metrics (e.g., f1 score, precision, recall, F1-score)
    mlflow.log_metric("train f1 score", f1_score_train)
    for label, metrics in train_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    mlflow.log_metric("test f1 score", f1_score_test)
    for label, metrics in test_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    # Log the trained model
    mlflow.sklearn.log_model(classifier, model_save_name, input_example=X_train[0].reshape(1, -1))

    # End the MLflow run (automatically done when exiting the context)
    # print(f"Model trained and logged with run ID: {mlflow.active_run().info.run_id}")

    print(f"Logged {model_name} model with training f1 score: {f1_score_train}")
    print(f"Logged {model_name} model with testing f1 score: {f1_score_test}")

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 121.76it/s]  

Logged Support Vector Classifier model with training f1 score: 1.0
Logged Support Vector Classifier model with testing f1 score: 0.9530201342281879





In [55]:
model_name = "Random Forest Classifier"
model_save_name = "random_forest_model"

# Start MLflow run
with mlflow.start_run(run_name=f"TF-IDF Enbedding + {model_name}"):

    # initializing model
    classifier = RandomForestClassifier()
    # Train the model
    classifier.fit(X_train, y_train)

    # Predict
    y_pred_train = classifier.predict(X_train)
    y_pred_test = classifier.predict(X_test)

    # Evaluate the model
    f1_score_train = f1_score(y_train, y_pred_train, average='micro')
    f1_score_test = f1_score(y_test, y_pred_test, average='micro')
    train_report = classification_report(y_train, y_pred_train, output_dict=True)
    test_report = classification_report(y_test, y_pred_test, output_dict=True)

    # Log parameters (e.g., model hyperparameters)
    mlflow.log_param("model", f"{model_name}")

    # Log metrics (e.g., f1 score, precision, recall, F1-score)
    mlflow.log_metric("train f1 score", f1_score_train)
    for label, metrics in train_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    mlflow.log_metric("test f1 score", f1_score_test)
    for label, metrics in test_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    # Log the trained model
    mlflow.sklearn.log_model(classifier, model_save_name, input_example=X_train[0].reshape(1, -1))

    # End the MLflow run (automatically done when exiting the context)
    # print(f"Model trained and logged with run ID: {mlflow.active_run().info.run_id}")

    print(f"Logged {model_name} model with training f1 score: {f1_score_train}")
    print(f"Logged {model_name} model with testing f1 score: {f1_score_test}")

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 4614.20it/s] 

Logged Random Forest Classifier model with training f1 score: 1.0
Logged Random Forest Classifier model with testing f1 score: 0.9530201342281879





In [56]:
model_name = "Gradient Boosting Classifier"
model_save_name = "gradient_boosting_model"

# Start MLflow run
with mlflow.start_run(run_name=f"TF-IDF Enbedding + {model_name}"):

    # initializing model
    classifier = GradientBoostingClassifier(verbose=False)
    # Train the model
    classifier.fit(X_train, y_train)

    # Predict
    y_pred_train = classifier.predict(X_train)
    y_pred_test = classifier.predict(X_test)

    # Evaluate the model
    f1_score_train = f1_score(y_train, y_pred_train, average='micro')
    f1_score_test = f1_score(y_test, y_pred_test, average='micro')
    train_report = classification_report(y_train, y_pred_train, output_dict=True)
    test_report = classification_report(y_test, y_pred_test, output_dict=True)

    # Log parameters (e.g., model hyperparameters)
    mlflow.log_param("model", f"{model_name}")

    # Log metrics (e.g., f1 score, precision, recall, F1-score)
    mlflow.log_metric("train f1 score", f1_score_train)
    for label, metrics in train_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    mlflow.log_metric("test f1 score", f1_score_test)
    for label, metrics in test_report.items():
        if isinstance(metrics, dict):  # Only log metrics that are numeric (e.g., per-class metrics)
            for metric_name, metric_value in metrics.items():
                mlflow.log_metric(f"{label}_{metric_name}", metric_value)

    # Log the trained model
    mlflow.sklearn.log_model(classifier, model_save_name, input_example=X_train[0].reshape(1, -1))

    # End the MLflow run (automatically done when exiting the context)
    # print(f"Model trained and logged with run ID: {mlflow.active_run().info.run_id}")

    print(f"Logged {model_name} model with training f1 score: {f1_score_train}")
    print(f"Logged {model_name} model with testing f1 score: {f1_score_test}")

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 5352.80it/s] 

Logged Gradient Boosting Classifier model with training f1 score: 1.0
Logged Gradient Boosting Classifier model with testing f1 score: 0.9507829977628636



