***
<h1 id="heading"><center><span style="background-color:#5642C5; color:white ; display:fill;border-radius:5px; font-family:cursive"> 3. Modeling ⚙️🛠️ </span></center><a class="anchor-link"></a></h1>
<p><center style="color:#159364; font-family:cursive;">The best way to make yourself extremely valuable in a team is to understand everything, but being a master of something.</center></p>

***


In [1]:
TRAIN_PATH = "../data/processed/1_train_preprocessed_df.pkl"
TEST_PATH = "../data/processed/1_test_preprocessed_df.pkl"


MLFLOW_TRACKING_URI = '../models/mlruns'
MLFLOW_EXPERIMENT_NAME = "fake_news_predection"

LOG_PATH = "../models/temp/"
LOG_DATA_PKL    =  "data.pkl"
LOG_MODEL_PKL   =  "model.pkl"
LOG_METRICS_PKL =  "metrics.pkl"

***
<h3 id="heading"><span style="background-color:#cefffb; color:#1b4946 ; display:fill;border-radius:5px; font-family:cursive"> 📥 Import packages & data </span></h3>

In [2]:
# Load packages
import pandas as pd 
import numpy as np
import logging
import pickle
import random
import plotly 
import os
from pathlib import Path
# Load Mlflow experiment tracker
# if MlFlow not working try 
# pip install pydantic==1.10.9
import mlflow
from mlflow.tracking import MlflowClient

from sklearn.pipeline import make_pipeline, FeatureUnion
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

# Load metrics
from sklearn import metrics
from sklearn.metrics import auc, accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, classification_report

# Load ml models
import xgboost as xg
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [3]:
def calculate_quality(ground_truth, predictions, metric_function):
    quality_score = round(metric_function(ground_truth, predictions) * 100, 2)
    return quality_score

_________

***
<h3 id="heading"><span style="background-color:#cefffb; color:#1b4946 ; display:fill;border-radius:5px; font-family:cursive"> 🗄️ Initialize </span></h3>

<h4 id="heading"><span font-family:Georgia"><Strong>📑 Create directories </Strong></span></h4>


In [4]:
# Create directories if they don't exist
Path(MLFLOW_TRACKING_URI).mkdir(parents=True, exist_ok=True)
Path(LOG_PATH).mkdir(parents=True, exist_ok=True)

<h4 id="heading"><span font-family:Georgia"><Strong>📑 Read data </Strong></span></h4>


In [5]:
# Read Data
train_df = pd.read_pickle(TRAIN_PATH)
test_df = pd.read_pickle(TEST_PATH)

In [6]:
test_df.head()

Unnamed: 0,content,label
0,david streitfeld specter trump loosen tongu pu...,0
1,russian warship readi strike terrorist near al...,1
2,common dream nodapl nativ american leader vow ...,0
3,daniel victor tim tebow attempt anoth comeback...,1
4,truth broadcast network keiser report meme war...,1


<h4 id="heading"><span font-family:Georgia"><Strong>✂️ Victorization </Strong></span></h4>


<h5 id="heading"><span style="background-color:#000639; color:#F6FEFF ; display:fill;border-radius:6px; font-family:Georgia"><Strong>🔵  TfidfVectorizer()   </Strong></span></h5>


In [7]:
# Fit the TF-IDF vectorizer on the training data
vectorizer = TfidfVectorizer()
vectorizer.fit(train_df['content'].values)

In [8]:
# Transform the training data
X_train = vectorizer.transform(train_df['content'].values)
Y_train = train_df['label'].values

In [9]:
# Transform the test data using the same vectorizer
X_test = vectorizer.transform(test_df['content'].values)
Y_test = test_df['label'].values

<h5 id="heading"><span style="background-color:#000639; color:#F6FEFF ; display:fill;border-radius:6px; font-family:Georgia"><Strong>🔵  CountVectorizer()   </Strong></span></h5>


~~~
cv=CountVectorizer(ngram_range=(1,3),max_features=7000)
xtrain=cv.fit_transform(xtrain)
xval=cv.transform(xval)
~~~

<h4 id="heading"><span font-family:Georgia"><Strong>📑 Split data </Strong></span></h4>


In [10]:
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train , train_size=0.8, random_state=0)

***
<h2 id="heading"><span style="background-color:#cefffb; color:#1b4946 ; display:fill;border-radius:5px; font-family:cursive"> 🏋️‍♂️ Train models </span></h2>

<h3 id="heading"><span font-family:Georgia"><Strong>📑 Initialize MLflow </Strong></span></h3>


In [11]:
# Initialize client and experiment
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
client = mlflow.MlflowClient()
mlflow.get_experiment_by_name(MLFLOW_EXPERIMENT_NAME)
exp = mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)

In [12]:
client.get_experiment_by_name

<bound method MlflowClient.get_experiment_by_name of <mlflow.tracking.client.MlflowClient object at 0x7f73983e3750>>

***
<h3 id="heading"><span style="background-color:#F7CBFF; color:#581845 ; display:fill;border-radius:5px;font-family:Georgia"><Strong>📦 1. Logistic regression </Strong></span></h3>


<h5 id="heading"><span font-family:Georgia"><Strong>🧑🏻‍⚖️ 1. Evaluate the training set </Strong></span></h5>


In [13]:
clf = make_pipeline(LogisticRegression())

clf.fit(X_train, Y_train)
lg_predictions_val =  pd.DataFrame(clf.predict(X_val))

In [14]:
# Evaluate on validation set
validation_scores = {score.__name__: calculate_quality(Y_val, predictions[0], score) 
                     for score in [accuracy_score, precision_score, recall_score, f1_score]}

# Create a DataFrame from the dictionary
train_scores_df = pd.DataFrame.from_dict(validation_scores, orient='index', columns=['Validation Score']).T
print('The confusion matrix is:\n ',confusion_matrix(Y_val, predictions))
# Display the DataFrame
train_scores_df

The confusion matrix is:
  [[1925  121]
 [  95 2019]]


Unnamed: 0,Validation Score
accuracy_score,94.81
precision_score,94.35
recall_score,95.51
f1_score,94.92


<h5 id="heading"><span font-family:Georgia"><Strong>🧑🏻‍⚖️ 2. Evaluate the test set </Strong></span></h5>


In [15]:
lg_predictions_test = clf.predict(X_test)

In [16]:
# Evaluate on validation set
validation_scores = {score.__name__: calculate_quality(Y_test, predictions1, score) 
                     for score in [accuracy_score, precision_score, recall_score, f1_score]}

# Create a DataFrame from the dictionary
test_scores_df = pd.DataFrame.from_dict(validation_scores, orient='index', columns=['Validation Score']).T
print('The confusion matrix is:\n ',confusion_matrix(Y_test, predictions1))
# Display the DataFrame
test_scores_df

The confusion matrix is:
  [[1502  837]
 [1075 1786]]


Unnamed: 0,Validation Score
accuracy_score,63.23
precision_score,68.09
recall_score,62.43
f1_score,65.13


In [17]:
mean_test_scores = test_scores_df.mean()
mean_test_scores

Validation Score    64.72
dtype: float64

***
<h4 id="heading"><span style="background-color:#cefffb; color:#1b4946 ; display:fill;border-radius:5px; font-family:cursive"> 🗃️ Log run </span></h4>

<h5 id="heading"><span font-family:Georgia"><Strong>📑 1. Prepare </Strong></span></h5>


In [18]:
# Data details
data_details = {"data_train_path": TRAIN_PATH,
                "data_test_path": TEST_PATH,
                "training_victorize": X_train,
                "test_victorize":     X_test
               }

with open(os.path.join(LOG_PATH, LOG_DATA_PKL), "wb") as output_file:
    pickle.dump(data_details, output_file)

In [19]:
# Model
model = {"model_description": "Baseline model: Logistic Regression ",
         "model_details": str(clf),
         "model_object": clf} 

with open(os.path.join(LOG_PATH, LOG_MODEL_PKL), "wb") as output_file:
    pickle.dump(model, output_file)

In [20]:
# Performance details
classes_metrics = {"train_scores": train_scores_df, 
                   "test_scores":  test_scores_df}

with open(os.path.join(LOG_PATH, LOG_METRICS_PKL), "wb") as output_file:
    pickle.dump(classes_metrics, output_file)

<h5 id="heading"><span font-family:Georgia"><Strong>📑 2. Log </Strong></span></h5>


In [21]:
# Start a new run and track 
with mlflow.start_run(experiment_id=exp.experiment_id, run_name=model["model_description"]):
    # Log pickles 
    mlflow.log_artifacts(LOG_PATH)
    
    # Track metrics 
    for metric, score in mean_test_scores.items():
        mlflow.log_metric(metric, score) 
    

***
<h3 id="heading"><span style="background-color:#F7CBFF; color:#581845 ; display:fill;border-radius:5px; font-family:Georgia"><Strong>📦 2. Support Vector Classifier (SVM) </Strong></span></h3>


<h5 id="heading"><span font-family:Georgia"><Strong>🧑🏻‍⚖️ 1. Evaluate the training set </Strong></span></h5>


In [22]:
svm_clf =  SVC()
svm_clf.fit(X_train, Y_train)
svm_predictions_val =  pd.DataFrame(svm_clf.predict(X_val))

In [23]:
# Evaluate on validation set
validation_scores = {score.__name__: calculate_quality(Y_val, svm_predictions_val[0], score) 
                     for score in [accuracy_score, precision_score, recall_score, f1_score]}

# Create a DataFrame from the dictionary
train_scores_df = pd.DataFrame.from_dict(validation_scores, orient='index', columns=['Validation Score']).T
print('The confusion matrix is:\n ',confusion_matrix(Y_val, svm_predictions_val))
# Display the DataFrame
train_scores_df

The confusion matrix is:
  [[1959   87]
 [  61 2053]]


Unnamed: 0,Validation Score
accuracy_score,96.44
precision_score,95.93
recall_score,97.11
f1_score,96.52


<h5 id="heading"><span font-family:Georgia"><Strong>🧑🏻‍⚖️ 2. Evaluate the test set </Strong></span></h5>


In [24]:
svm_predictions_test = svm_clf.predict(X_test)

In [25]:
# Evaluate on validation set
validation_scores = {score.__name__: calculate_quality(Y_test, svm_predictions_test, score) 
                     for score in [accuracy_score, precision_score, recall_score, f1_score]}

# Create a DataFrame from the dictionary
test_scores_df = pd.DataFrame.from_dict(validation_scores, orient='index', columns=['Validation Score']).T
print('The confusion matrix is:\n ',confusion_matrix(Y_test, svm_predictions_test))
# Display the DataFrame
test_scores_df

The confusion matrix is:
  [[1516  823]
 [1075 1786]]


Unnamed: 0,Validation Score
accuracy_score,63.5
precision_score,68.46
recall_score,62.43
f1_score,65.3


In [26]:
mean_test_scores = test_scores_df.mean()
mean_test_scores

Validation Score    64.9225
dtype: float64

***
<h4 id="heading"><span style="background-color:#cefffb; color:#1b4946 ; display:fill;border-radius:5px; font-family:cursive"> 🗃️ Log run </span></h4>

<h5 id="heading"><span font-family:Georgia"><Strong>📑 1. Prepare </Strong></span></h5>


In [27]:
# Data details
data_details = {"data_train_path": TRAIN_PATH,
                "data_test_path": TEST_PATH,
                "training_victorize": X_train,
                "test_victorize":     X_test
               }

with open(os.path.join(LOG_PATH, LOG_DATA_PKL), "wb") as output_file:
    pickle.dump(data_details, output_file)

In [28]:
# Model
model = {"model_description": " Support Vector Classifier (SVM) ",
         "model_details": str(svm_clf),
         "model_object": svm_clf} 

with open(os.path.join(LOG_PATH, LOG_MODEL_PKL), "wb") as output_file:
    pickle.dump(model, output_file)

In [29]:
# Performance details
classes_metrics = {"train_scores": train_scores_df, 
                   "test_scores":  test_scores_df}

with open(os.path.join(LOG_PATH, LOG_METRICS_PKL), "wb") as output_file:
    pickle.dump(classes_metrics, output_file)

<h5 id="heading"><span font-family:Georgia"><Strong>📑 2. Log </Strong></span></h5>


In [30]:
# Start a new run and track 
with mlflow.start_run(experiment_id=exp.experiment_id, run_name=model["model_description"]):
    # Log pickles 
    mlflow.log_artifacts(LOG_PATH)
    
    # Track metrics 
    for metric, score in mean_test_scores.items():
        mlflow.log_metric(metric, score) 
    

***
<h3 id="heading"><span style="background-color:#F7CBFF; color:#581845 ; display:fill;border-radius:5px; font-family:Georgia"><Strong>📦 3. Random Forest Classifier </Strong></span></h3>


<h5 id="heading"><span font-family:Georgia"><Strong>🧑🏻‍⚖️ 1. Evaluate the training set </Strong></span></h5>


In [31]:
rf_clf = make_pipeline(RandomForestClassifier())

rf_clf.fit(X_train, Y_train)
rf_predictions_val =  pd.DataFrame(rf_clf.predict(X_val))

In [32]:
# Evaluate on validation set
validation_scores = {score.__name__: calculate_quality(Y_val, rf_predictions_val[0], score) 
                     for score in [accuracy_score, precision_score, recall_score, f1_score]}

# Create a DataFrame from the dictionary
train_scores_df = pd.DataFrame.from_dict(validation_scores, orient='index', columns=['Validation Score']).T
print('The confusion matrix is:\n ',confusion_matrix(Y_val, rf_predictions_val))
# Display the DataFrame
train_scores_df

The confusion matrix is:
  [[1944  102]
 [ 222 1892]]


Unnamed: 0,Validation Score
accuracy_score,92.21
precision_score,94.88
recall_score,89.5
f1_score,92.11


<h5 id="heading"><span font-family:Georgia"><Strong>🧑🏻‍⚖️ 2. Evaluate the test set </Strong></span></h5>


In [33]:
rf_predictions_test = rf_clf.predict(X_test)

In [34]:
# Evaluate on validation set
validation_scores = {score.__name__: calculate_quality(Y_test, rf_predictions_test, score) 
                     for score in [accuracy_score, precision_score, recall_score, f1_score]}

# Create a DataFrame from the dictionary
test_scores_df = pd.DataFrame.from_dict(validation_scores, orient='index', columns=['Validation Score']).T
print('The confusion matrix is:\n ',confusion_matrix(Y_test, rf_predictions_test))
# Display the DataFrame
test_scores_df

The confusion matrix is:
  [[1677  662]
 [1088 1773]]


Unnamed: 0,Validation Score
accuracy_score,66.35
precision_score,72.81
recall_score,61.97
f1_score,66.96


In [35]:
mean_test_scores = test_scores_df.mean()
mean_test_scores

Validation Score    67.0225
dtype: float64

<h4 id="heading"><span style="background-color:#cefffb; color:#1b4946 ; display:fill;border-radius:5px; font-family:cursive"> 🗃️ Log run </span></h4>

<h5 id="heading"><span font-family:Georgia"><Strong>📑 1. Prepare </Strong></span></h5>


In [36]:
# Data details
data_details = {"data_train_path": TRAIN_PATH,
                "data_test_path": TEST_PATH,
                "training_victorize": X_train,
                "test_victorize":     X_test
               }

with open(os.path.join(LOG_PATH, LOG_DATA_PKL), "wb") as output_file:
    pickle.dump(data_details, output_file)

In [37]:
# Model
model = {"model_description": "random Forest Classifier ",
         "model_details": str(rf_clf),
         "model_object": rf_clf} 

with open(os.path.join(LOG_PATH, LOG_MODEL_PKL), "wb") as output_file:
    pickle.dump(model, output_file)

In [38]:
# Performance details
classes_metrics = {"train_scores": train_scores_df, 
                   "test_scores":  test_scores_df}

with open(os.path.join(LOG_PATH, LOG_METRICS_PKL), "wb") as output_file:
    pickle.dump(classes_metrics, output_file)

<h5 id="heading"><span font-family:Georgia"><Strong>📑 2. Log </Strong></span></h5>


In [39]:
# Start a new run and track 
with mlflow.start_run(experiment_id=exp.experiment_id, run_name=model["model_description"]):
    # Log pickles 
    mlflow.log_artifacts(LOG_PATH)
    
    # Track metrics 
    for metric, score in mean_test_scores.items():
        mlflow.log_metric(metric, score) 
    

***
<h3 id="heading"><span style="background-color:#F7CBFF; color:#581845 ; display:fill;border-radius:5px; font-family:Georgia"><Strong>📦 4. DecisionTreeClassifier </Strong></span></h3>


<h5 id="heading"><span font-family:Georgia"><Strong>🧑🏻‍⚖️ 1. Evaluate the training set </Strong></span></h5>


In [40]:
dt_clf = make_pipeline(DecisionTreeClassifier())

dt_clf.fit(X_train, Y_train)
dt_predictions_val =  pd.DataFrame(dt_clf.predict(X_val))

In [41]:
# Evaluate on validation set
validation_scores = {score.__name__: calculate_quality(Y_val, dt_predictions_val[0], score) 
                     for score in [accuracy_score, precision_score, recall_score, f1_score]}

# Create a DataFrame from the dictionary
train_scores_df = pd.DataFrame.from_dict(validation_scores, orient='index', columns=['Validation Score']).T
print('The confusion matrix is:\n ',confusion_matrix(Y_val, dt_predictions_val))
# Display the DataFrame
train_scores_df

The confusion matrix is:
  [[1900  146]
 [ 119 1995]]


Unnamed: 0,Validation Score
accuracy_score,93.63
precision_score,93.18
recall_score,94.37
f1_score,93.77


<h5 id="heading"><span font-family:Georgia"><Strong>🧑🏻‍⚖️ 2. Evaluate the test set </Strong></span></h5>


In [42]:
dt_predictions_test = dt_clf.predict(X_test)

In [43]:
# Evaluate on validation set
validation_scores = {score.__name__: calculate_quality(Y_test, dt_predictions_test, score) 
                     for score in [accuracy_score, precision_score, recall_score, f1_score]}

# Create a DataFrame from the dictionary
test_scores_df = pd.DataFrame.from_dict(validation_scores, orient='index', columns=['Validation Score']).T
print('The confusion matrix is:\n ',confusion_matrix(Y_test, dt_predictions_test))
# Display the DataFrame
test_scores_df

The confusion matrix is:
  [[1531  808]
 [1075 1786]]


Unnamed: 0,Validation Score
accuracy_score,63.79
precision_score,68.85
recall_score,62.43
f1_score,65.48


In [44]:
mean_test_scores = test_scores_df.mean()
mean_test_scores

Validation Score    65.1375
dtype: float64

***
<h4 id="heading"><span style="background-color:#cefffb; color:#1b4946 ; display:fill;border-radius:5px; font-family:cursive"> 🗃️ Log run </span></h4>

<h5 id="heading"><span font-family:Georgia"><Strong>📑 1. Prepare </Strong></span></h5>


In [45]:
# Data details
data_details = {"data_train_path": TRAIN_PATH,
                "data_test_path": TEST_PATH,
                "training_victorize": X_train,
                "test_victorize":     X_test
               }

with open(os.path.join(LOG_PATH, LOG_DATA_PKL), "wb") as output_file:
    pickle.dump(data_details, output_file)

In [46]:
# Model
model = {"model_description": "Decision Tree Classifier ",
         "model_details": str(dt_clf),
         "model_object": dt_clf} 

with open(os.path.join(LOG_PATH, LOG_MODEL_PKL), "wb") as output_file:
    pickle.dump(model, output_file)

In [47]:
# Performance details
classes_metrics = {"train_scores": train_scores_df, 
                   "test_scores":  test_scores_df}

with open(os.path.join(LOG_PATH, LOG_METRICS_PKL), "wb") as output_file:
    pickle.dump(classes_metrics, output_file)

<h5 id="heading"><span font-family:Georgia"><Strong>📑 2. Log </Strong></span></h5>


In [48]:
# Start a new run and track 
with mlflow.start_run(experiment_id=exp.experiment_id, run_name=model["model_description"]):
    # Log pickles 
    mlflow.log_artifacts(LOG_PATH)
    
    # Track metrics 
    for metric, score in mean_test_scores.items():
        mlflow.log_metric(metric, score) 
    

***
<h3 id="heading"><span style="background-color:#F7CBFF; color:#581845 ; display:fill;border-radius:5px; font-family:Georgia"><Strong>📦 5. XGboost Classifier </Strong></span></h3>


<h5 id="heading"><span font-family:Georgia"><Strong>🧑🏻‍⚖️ 1. Evaluate the training set </Strong></span></h5>


In [49]:
xgb_clf = make_pipeline(xg.XGBClassifier())

xgb_clf.fit(X_train, Y_train)
xgb_predictions_val =  pd.DataFrame(xgb_clf.predict(X_val))

In [50]:
# Evaluate on validation set
validation_scores = {score.__name__: calculate_quality(Y_val, xgb_predictions_val[0], score) 
                     for score in [accuracy_score, precision_score, recall_score, f1_score]}

# Create a DataFrame from the dictionary
train_scores_df = pd.DataFrame.from_dict(validation_scores, orient='index', columns=['Validation Score']).T
print('The confusion matrix is:\n ',confusion_matrix(Y_val, xgb_predictions_val))
# Display the DataFrame
train_scores_df

The confusion matrix is:
  [[1961   85]
 [  66 2048]]


Unnamed: 0,Validation Score
accuracy_score,96.37
precision_score,96.02
recall_score,96.88
f1_score,96.44


<h5 id="heading"><span font-family:Georgia"><Strong>🧑🏻‍⚖️ 2. Evaluate the test set </Strong></span></h5>


In [51]:
xgb_predictions_test = xgb_clf.predict(X_test)

In [52]:
# Evaluate on validation set
validation_scores = {score.__name__: calculate_quality(Y_test, xgb_predictions_test, score) 
                     for score in [accuracy_score, precision_score, recall_score, f1_score]}

# Create a DataFrame from the dictionary
test_scores_df = pd.DataFrame.from_dict(validation_scores, orient='index', columns=['Validation Score']).T
print('The confusion matrix is:\n ',confusion_matrix(Y_test, xgb_predictions_test))
# Display the DataFrame
test_scores_df

The confusion matrix is:
  [[1522  817]
 [1066 1795]]


Unnamed: 0,Validation Score
accuracy_score,63.79
precision_score,68.72
recall_score,62.74
f1_score,65.59


In [62]:
test_scores_df = test_scores_df.T

In [63]:
test_scores_df

Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
Validation Score,63.79,68.72,62.74,65.59


In [69]:
mean_test_scores = test_scores_df.mean()
mean_test_scores

accuracy_score     63.79
precision_score    68.72
recall_score       62.74
f1_score           65.59
dtype: float64

***
<h4 id="heading"><span style="background-color:#cefffb; color:#1b4946 ; display:fill;border-radius:5px; font-family:cursive"> 🗃️ Log run </span></h4>

<h5 id="heading"><span font-family:Georgia"><Strong>📑 1. Prepare </Strong></span></h5>


In [64]:
# Data details
data_details = {"data_train_path": TRAIN_PATH,
                "data_test_path": TEST_PATH,
                "training_victorize": X_train,
                "test_victorize":     X_test
               }

with open(os.path.join(LOG_PATH, LOG_DATA_PKL), "wb") as output_file:
    pickle.dump(data_details, output_file)

In [65]:
# Model
model = {"model_description": "XGboost Classifier ",
         "model_details": str(xgb_clf),
         "model_object": xgb_clf} 

with open(os.path.join(LOG_PATH, LOG_MODEL_PKL), "wb") as output_file:
    pickle.dump(model, output_file)

In [66]:
# Performance details
classes_metrics = {"train_scores": train_scores_df, 
                   "test_scores":  test_scores_df}

with open(os.path.join(LOG_PATH, LOG_METRICS_PKL), "wb") as output_file:
    pickle.dump(classes_metrics, output_file)

<h5 id="heading"><span font-family:Georgia"><Strong>📑 2. Log </Strong></span></h5>


In [70]:
# Start a new run and track 
with mlflow.start_run(experiment_id=exp.experiment_id, run_name=model["model_description"]):
    # Log pickles 
    mlflow.log_artifacts(LOG_PATH)
    
    # Track metrics 
    for metric, score in mean_test_scores.items():
        mlflow.log_metric(metric, score) 
    