In [1]:
# importing libraries
import pandas as pd
import re
import numpy as np
import spacy
import contractions
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize import word_tokenize
import nltk
from nltk.corpus import wordnet
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

In [2]:
#loading datasets
df1 = pd.read_csv('./data/news_a1.csv')
print(df1.shape)
df1.head()

(8620, 7)


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,text,subject,date,label
0,4218,15432,NEWSFLASH FOR OUR IMPERIAL PRESIDENT: STATES C...,As Barack Hussein Obama tours around the count...,politics,"Jul 23, 2015",FAKE
1,3828,19686,Factbox: Reactions to speech by Myanmar's Suu ...,NAYPYITAW (Reuters) - Myanmar leader Aung San ...,worldnews,"September 19, 2017",REAL
2,9288,10721,"RADICAL, INTOLERANT Students Held College Admi...",Video from inside Evergreen State College cont...,politics,"Jun 2, 2017",FAKE
3,11577,20031,Japan's Suga: government strongly protests lat...,TOKYO (Reuters) - North Korea fired a ballisti...,worldnews,"September 14, 2017",REAL
4,8869,10577,CNN’s Jim Acosta Goes Bonkers Waving His Hands...,Watch Jim Acosta wave his hands around and hav...,politics,"Jun 19, 2017",FAKE


In [3]:
df2 = pd.read_csv('./data/news_a2.csv')
print(df2.shape)
df2.head()

(8620, 7)


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,text,subject,date,label
0,22406,4545,Trump officials defend immigration arrests at ...,LOS ANGELES (Reuters) - Federal agents have ar...,politicsNews,"March 31, 2017",REAL
1,4061,20039,LIBERAL SMACK DOWN OF THE DAY: Watch What Happ...,The Left is not able to get away with shaming ...,left-news,"Sep 2, 2016",FAKE
2,3456,6739,Factbox: Trump fills top jobs for his administ...,(Reuters) - U.S. President-elect Donald Trump ...,politicsNews,"December 13, 2016",REAL
3,4956,18869,"Iraq Kurdish vote may benefit Syrian Kurds, sa...",BEIRUT (Reuters) - The Iraqi Kurdish vote for ...,worldnews,"September 27, 2017",REAL
4,7787,17092,Tillerson to visit Pakistan as well as India: ...,WASHINGTON (Reuters) - U.S. Secretary of State...,worldnews,"October 18, 2017",REAL


In [4]:
df3 = pd.read_csv('./data/news2.csv')
print(df3.shape)
df3.head()

(8980, 6)


Unnamed: 0.1,Unnamed: 0,title,text,subject,date,label
0,22216,Ben Stein Calls Out 9th Circuit Court: Committ...,"21st Century Wire says Ben Stein, reputable pr...",US_News,"February 13, 2017",FAKE
1,4436,Trump drops Steve Bannon from National Securit...,WASHINGTON (Reuters) - U.S. President Donald T...,politicsNews,"April 5, 2017",REAL
2,1526,Puerto Rico expects U.S. to lift Jones Act shi...,(Reuters) - Puerto Rico Governor Ricardo Rosse...,politicsNews,"September 27, 2017",REAL
3,1377,OOPS: Trump Just Accidentally Confirmed He Le...,"On Monday, Donald Trump once again embarrassed...",News,"May 22, 2017",FAKE
4,8995,Donald Trump heads for Scotland to reopen a go...,"GLASGOW, Scotland (Reuters) - Most U.S. presid...",politicsNews,"June 24, 2016",REAL


Data preparation and cleaning

In [5]:
# joining the datasets 
df = pd.concat([df1, df2, df3])
print(df.shape)
df.head()

(26220, 7)


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,text,subject,date,label
0,4218.0,15432,NEWSFLASH FOR OUR IMPERIAL PRESIDENT: STATES C...,As Barack Hussein Obama tours around the count...,politics,"Jul 23, 2015",FAKE
1,3828.0,19686,Factbox: Reactions to speech by Myanmar's Suu ...,NAYPYITAW (Reuters) - Myanmar leader Aung San ...,worldnews,"September 19, 2017",REAL
2,9288.0,10721,"RADICAL, INTOLERANT Students Held College Admi...",Video from inside Evergreen State College cont...,politics,"Jun 2, 2017",FAKE
3,11577.0,20031,Japan's Suga: government strongly protests lat...,TOKYO (Reuters) - North Korea fired a ballisti...,worldnews,"September 14, 2017",REAL
4,8869.0,10577,CNN’s Jim Acosta Goes Bonkers Waving His Hands...,Watch Jim Acosta wave his hands around and hav...,politics,"Jun 19, 2017",FAKE


In [6]:
# checking for missing values
df.isnull().sum()

Unnamed: 0.1    8980
Unnamed: 0         0
title              0
text               0
subject            0
date               0
label              0
dtype: int64

In [7]:
# dropping duplicates
df.drop_duplicates(subset ="title", keep = 'first', inplace = True)
df.shape

(24051, 7)

In [8]:
# checking imbalanced data
df['label'].value_counts()

REAL    12303
FAKE    11748
Name: label, dtype: int64

Text Cleaning

In [9]:
# normalizing the documents
def normalize_document(doc):
    # remove special characters\whitespaces
    pattern = r'[^a-zA-Z0-9\s]'
    doc = re.sub(pattern, '', doc)
    doc = doc.strip()
    doc = contractions.fix(doc)
    doc = doc.lower()
    return doc

In [10]:
# removing stopwords 
nlp = spacy.load('en_core_web_sm')
stopwords = nlp.Defaults.stop_words
def remove_stop(doc):
    #tokenize words
    word_tokens = word_tokenize(doc)
    #removing stopwords
    filtered_tokens = [token for token in word_tokens if token not in stopwords]
    return filtered_tokens

In [11]:
# function for converting tags
def pos_tag_wordnet(tagged_tokens):
    tag_map = {'j': wordnet.ADJ, 'v': wordnet.VERB, 'n': wordnet.NOUN, 'r': wordnet.ADV}
    new_tagged_tokens = [(word, tag_map.get(tag[0].lower(), wordnet.NOUN))
                            for word, tag in tagged_tokens]
    return new_tagged_tokens

In [12]:
# lematizing words
from nltk.stem import WordNetLemmatizer
wnl = WordNetLemmatizer()
def lemmatize(doc):
    #POS tagging
    tagged_tokens = nltk.pos_tag(doc)
    # converting the tags
    wordnet_tokens = pos_tag_wordnet(tagged_tokens)
    #lemmatizing
    filtered_doc = ' '.join(wnl.lemmatize(word, tag) for word, tag in wordnet_tokens)
    return filtered_doc

In [13]:
# cleaning title
df['clean_title'] = df['title'].apply(lambda x: normalize_document(x)) # normalize text
df['clean_title'] = df['clean_title'].apply(lambda x: remove_stop(x)) # remove stopwords
df['clean_title'] = df['clean_title'].apply(lambda x: lemmatize(x)) # lemmatize text
df.head()


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,text,subject,date,label,clean_title
0,4218.0,15432,NEWSFLASH FOR OUR IMPERIAL PRESIDENT: STATES C...,As Barack Hussein Obama tours around the count...,politics,"Jul 23, 2015",FAKE,newsflash imperial president state refuse iran...
1,3828.0,19686,Factbox: Reactions to speech by Myanmar's Suu ...,NAYPYITAW (Reuters) - Myanmar leader Aung San ...,worldnews,"September 19, 2017",REAL,factbox reaction speech myanmar suu kyi violen...
2,9288.0,10721,"RADICAL, INTOLERANT Students Held College Admi...",Video from inside Evergreen State College cont...,politics,"Jun 2, 2017",FAKE,radical intolerant student hold college admini...
3,11577.0,20031,Japan's Suga: government strongly protests lat...,TOKYO (Reuters) - North Korea fired a ballisti...,worldnews,"September 14, 2017",REAL,japan suga government strongly protest late n ...
4,8869.0,10577,CNN’s Jim Acosta Goes Bonkers Waving His Hands...,Watch Jim Acosta wave his hands around and hav...,politics,"Jun 19, 2017",FAKE,cnns jim acosta go bonkers wave hand camera se...


In [14]:
# cleaning text
df['clean_text'] = df['text'].apply(lambda x: normalize_document(x)) # normalize text
df['clean_text'] = df['clean_text'].apply(lambda x: remove_stop(x)) # remove stopwords
df['clean_text'] = df['clean_text'].apply(lambda x: lemmatize(x)) # lemmatize text
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,text,subject,date,label,clean_title,clean_text
0,4218.0,15432,NEWSFLASH FOR OUR IMPERIAL PRESIDENT: STATES C...,As Barack Hussein Obama tours around the count...,politics,"Jul 23, 2015",FAKE,newsflash imperial president state refuse iran...,barack hussein obama tour country try convince...
1,3828.0,19686,Factbox: Reactions to speech by Myanmar's Suu ...,NAYPYITAW (Reuters) - Myanmar leader Aung San ...,worldnews,"September 19, 2017",REAL,factbox reaction speech myanmar suu kyi violen...,naypyitaw reuters myanmar leader aung san suu ...
2,9288.0,10721,"RADICAL, INTOLERANT Students Held College Admi...",Video from inside Evergreen State College cont...,politics,"Jun 2, 2017",FAKE,radical intolerant student hold college admini...,video inside evergreen state college continue ...
3,11577.0,20031,Japan's Suga: government strongly protests lat...,TOKYO (Reuters) - North Korea fired a ballisti...,worldnews,"September 14, 2017",REAL,japan suga government strongly protest late n ...,tokyo reuters north korea fire ballistic missi...
4,8869.0,10577,CNN’s Jim Acosta Goes Bonkers Waving His Hands...,Watch Jim Acosta wave his hands around and hav...,politics,"Jun 19, 2017",FAKE,cnns jim acosta go bonkers wave hand camera se...,watch jim acosta wave hand temper tantrum air ...


In [15]:
# turning the label to int
df['label'] = (df.label == 'FAKE').astype(int)
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,text,subject,date,label,clean_title,clean_text
0,4218.0,15432,NEWSFLASH FOR OUR IMPERIAL PRESIDENT: STATES C...,As Barack Hussein Obama tours around the count...,politics,"Jul 23, 2015",1,newsflash imperial president state refuse iran...,barack hussein obama tour country try convince...
1,3828.0,19686,Factbox: Reactions to speech by Myanmar's Suu ...,NAYPYITAW (Reuters) - Myanmar leader Aung San ...,worldnews,"September 19, 2017",0,factbox reaction speech myanmar suu kyi violen...,naypyitaw reuters myanmar leader aung san suu ...
2,9288.0,10721,"RADICAL, INTOLERANT Students Held College Admi...",Video from inside Evergreen State College cont...,politics,"Jun 2, 2017",1,radical intolerant student hold college admini...,video inside evergreen state college continue ...
3,11577.0,20031,Japan's Suga: government strongly protests lat...,TOKYO (Reuters) - North Korea fired a ballisti...,worldnews,"September 14, 2017",0,japan suga government strongly protest late n ...,tokyo reuters north korea fire ballistic missi...
4,8869.0,10577,CNN’s Jim Acosta Goes Bonkers Waving His Hands...,Watch Jim Acosta wave his hands around and hav...,politics,"Jun 19, 2017",1,cnns jim acosta go bonkers wave hand camera se...,watch jim acosta wave hand temper tantrum air ...


Splitting data into train and validation set

In [16]:
#split to get test set
df_train, df_val = train_test_split(df, test_size=0.2, random_state=1)

In [17]:
#creating the label
y_train = df_train.label.values
y_val = df_val.label.values

In [18]:
print(df_train.shape), print(y_train.shape), print(df_val.shape), print(y_val.shape)

(19240, 9)
(19240,)
(4811, 9)
(4811,)


(None, None, None, None)

Encoding text data & feature engineering

In [19]:
#creating a function for encoding text data
def transform_text(data):
    #transforming with count vectorizer
    vectorizer = CountVectorizer()
    vectorizer.fit(df_train['clean_title'])
    cv = vectorizer.transform(data.clean_title)
    return cv

In [20]:
#transforming train data
train_data = transform_text(df_train)
print(train_data.shape)

(19240, 15806)


In [21]:
#transforming validation data
val_data = transform_text(df_val)
print(val_data.shape)

(4811, 15806)


Model training and testing

1. Logistic Regression

In [22]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(max_iter=500, solver='lbfgs')
lr.fit(train_data, y_train) #fitting  model

In [23]:
#predicting 
y_pred = lr.predict(val_data)#predicting validation values
train_pred = lr.predict(train_data)#predicting train values

In [24]:
# evaluation
print('train results')
print(classification_report(y_train,train_pred))
print(f'accuracy - {accuracy_score(y_train,train_pred)}')
print(' ')
print('Validation results')
print(classification_report(y_val,y_pred))
print(f'accuracy - {accuracy_score(y_val,y_pred)}')

train results
              precision    recall  f1-score   support

           0       0.97      0.98      0.98      9872
           1       0.98      0.97      0.98      9368

    accuracy                           0.98     19240
   macro avg       0.98      0.98      0.98     19240
weighted avg       0.98      0.98      0.98     19240

accuracy - 0.9765592515592516
 
Validation results
              precision    recall  f1-score   support

           0       0.91      0.95      0.93      2431
           1       0.95      0.90      0.93      2380

    accuracy                           0.93      4811
   macro avg       0.93      0.93      0.93      4811
weighted avg       0.93      0.93      0.93      4811

accuracy - 0.9289129079193514


2. Decision Trees

In [25]:
from sklearn.tree import DecisionTreeClassifier
#training
dt = DecisionTreeClassifier(min_samples_leaf=1)
dt.fit(train_data, y_train)

In [26]:
# predicting
y_pred = dt.predict(val_data)
train_pred = dt.predict(train_data)

In [27]:
# evaluation
print('train results')
print(classification_report(y_train,train_pred))
print(f'accuracy - {accuracy_score(y_train,train_pred)}')
print(' ')
print('Validation results')
print(classification_report(y_val,y_pred))
print(f'accuracy - {accuracy_score(y_val,y_pred)}')

train results
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9872
           1       1.00      1.00      1.00      9368

    accuracy                           1.00     19240
   macro avg       1.00      1.00      1.00     19240
weighted avg       1.00      1.00      1.00     19240

accuracy - 1.0
 
Validation results
              precision    recall  f1-score   support

           0       0.86      0.88      0.87      2431
           1       0.88      0.86      0.87      2380

    accuracy                           0.87      4811
   macro avg       0.87      0.87      0.87      4811
weighted avg       0.87      0.87      0.87      4811

accuracy - 0.8702972355019747


3. Random Forest

In [28]:
# training
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=50, max_depth=1500, random_state=1)
rf.fit(train_data, y_train)

In [29]:
# predicting
y_pred = rf.predict(val_data)
train_pred = rf.predict(train_data)

In [30]:
# evaluation
print('train results')
print(classification_report(y_train,train_pred))
print(f'accuracy - {accuracy_score(y_train,train_pred)}')
print(' ')
print('Validation results')
print(classification_report(y_val,y_pred))
print(f'accuracy - {accuracy_score(y_val,y_pred)}')
print(f'f1 score - {f1_score(y_val, y_pred)}')

train results
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9872
           1       1.00      1.00      1.00      9368

    accuracy                           1.00     19240
   macro avg       1.00      1.00      1.00     19240
weighted avg       1.00      1.00      1.00     19240

accuracy - 0.9996881496881497
 
Validation results
              precision    recall  f1-score   support

           0       0.87      0.95      0.91      2431
           1       0.95      0.86      0.90      2380

    accuracy                           0.91      4811
   macro avg       0.91      0.91      0.91      4811
weighted avg       0.91      0.91      0.91      4811

accuracy - 0.9064643525254625
f1 score - 0.9009683098591549


Experiment tracking

In [31]:
import mlflow
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("course_project")

<Experiment: artifact_location='./mlruns/1', experiment_id='1', lifecycle_stage='active', name='course_project', tags={}>

1. Experiment run for linear regression

In [32]:
# experiment run for linear regression
with mlflow.start_run():

    # setting tag for model
    mlflow.set_tag("model", "linear regression")

    # fitting the train data
    lr.fit(train_data, y_train)

    # validating data
    y_pred = lr.predict(val_data)

    # evaluation
    f1 = f1_score(y_val, y_pred)
    auc = roc_auc_score(y_val, y_pred)
    accuracy = accuracy_score(y_val,y_pred)
    # logging metrics
    mlflow.log_metric("auc score", auc)
    mlflow.log_metric("f1 score", f1)
    mlflow.log_metric("accuracy", accuracy)

2. Hyperparameter tuning for Random Forest and Decision Trees

In [39]:
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from hyperopt.pyll import scope
from hyperopt import space_eval

In [40]:
def run(num_trials):
    def objective(params):
            classifier_type = params['type'] # defining model type
            del params['type'] # deleting model type from params dict
            with mlflow.start_run():
                if classifier_type == 'rf':
                    mlflow.set_tag("model", "random forest")
                    mlflow.log_params(params)
                    rf = RandomForestClassifier(**params)
                    rf.fit(train_data, y_train)
                    y_pred = rf.predict(val_data)
                elif classifier_type == 'dt':
                    mlflow.set_tag("model", "Decision trees")
                    mlflow.log_params(params)
                    dt = DecisionTreeClassifier(**params)
                    dt.fit(train_data, y_train)
                    y_pred = dt.predict(val_data)
                else:
                    y_pred = 0

                # evaluation
                f1 = f1_score(y_val, y_pred)
                auc = roc_auc_score(y_val, y_pred)
                accuracy = accuracy_score(y_val,y_pred)
                
                # logging metrics
                metrics = {"auc score": auc, "f1 score":f1, "accuracy":accuracy}
                mlflow.log_metrics(metrics)
            return {'loss': -f1, 'status': STATUS_OK}
    # defining search space
    search_space = hp.choice('classifier_type', [
        {
        'type': 'rf',
        'max_depth': scope.int(hp.quniform('max_depth', 100, 800, 100)),
        'n_estimators': scope.int(hp.quniform('n_estimators', 10, 50, 10)),
        'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 10, 1)),
        'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 1, 4, 1)),
        'random_state': 42
        },
        {
        'type': 'dt',
        'max_depth': scope.int(hp.quniform('max_depth_dt', 100, 800, 100)),
        'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf_dt', 1, 4, 1)),
        'random_state': 42
        }])

    rstate = np.random.default_rng(42)  # for reproducible results
    # creating the fmin function
    best_result = fmin(
                fn=objective,
                space=search_space,
                algo=tpe.suggest,
                max_evals=num_trials,
                trials=Trials(),
                rstate=rstate
                )
    print(space_eval(search_space, best_result))

run(50)

100%|██████████| 50/50 [02:19<00:00,  2.79s/trial, best loss: -0.9071490845684393]
{'max_depth': 700, 'min_samples_leaf': 1, 'min_samples_split': 9, 'n_estimators': 40, 'random_state': 42, 'type': 'rf'}


Retraining and Autologging with the best model and parameters

In [41]:
best_params = {
        'max_depth': 700,
        'n_estimators': 40,
        'min_samples_split': 9,
        'min_samples_leaf': 1,
        'random_state': 42
        }

mlflow.sklearn.autolog()

rf = RandomForestClassifier(**best_params)
rf.fit(train_data, y_train)

2022/08/19 09:06:24 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'd3a0fff7491647edb5445054e14c8125', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


In [42]:
# autologging linear regression
mlflow.sklearn.autolog()

lr = LogisticRegression(max_iter=500, solver='lbfgs')
lr.fit(train_data, y_train)

2022/08/19 09:30:27 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '9d7a0e1bd60a4c2d8ce15f46a4e662dd', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Testing the registered models then promote best model to production

In [43]:
# getting test data
test_df = pd.read_csv('./data/news_b1.csv')
print(test_df.shape)
test_df.head()

(5747, 7)


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,text,subject,date,label
0,27040,1585,U.S. Senate opposition to Obamacare repeal bil...,WASHINGTON (Reuters) - A proposal by U.S. Repu...,politicsNews,"September 24, 2017",REAL
1,22353,15859,BREAKING: COURAGEOUS FEDERAL JUDGE DENIES OBAM...,"If I were U.S. District Judge Andrew Hanen, I ...",politics,"Apr 8, 2015",FAKE
2,25461,5943,Republican Senator Prays For God To Kill Obam...,The Republican Party is currently holding a jo...,News,"June 10, 2016",FAKE
3,17391,6004,Trump Caught Paying Women Working On His Camp...,It s not surprising that Donald Trump would pa...,News,"June 5, 2016",FAKE
4,5995,15252,HILLARY EXPOSED: WATCH UNCOVERED VIDEO The Hil...,Hillary shows her true colors when a female st...,politics,"Aug 31, 2015",FAKE


In [44]:
test_df.isnull().sum()

Unnamed: 0.1    0
Unnamed: 0      0
title           0
text            0
subject         0
date            0
label           0
dtype: int64

In [45]:
test_df.drop_duplicates(subset ="title", keep = 'first', inplace = True)
test_df.shape

(5623, 7)

In [46]:
# cleaning title
test_df['clean_title'] = test_df['title'].apply(lambda x: normalize_document(x)) # normalize text
test_df['clean_title'] = test_df['clean_title'].apply(lambda x: remove_stop(x)) # remove stopwords
test_df['clean_title'] = test_df['clean_title'].apply(lambda x: lemmatize(x)) # lemmatize text
test_df['label'] = (test_df.label == 'FAKE').astype(int)
test_df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,text,subject,date,label,clean_title
0,27040,1585,U.S. Senate opposition to Obamacare repeal bil...,WASHINGTON (Reuters) - A proposal by U.S. Repu...,politicsNews,"September 24, 2017",0,senate opposition obamacare repeal bill grows
1,22353,15859,BREAKING: COURAGEOUS FEDERAL JUDGE DENIES OBAM...,"If I were U.S. District Judge Andrew Hanen, I ...",politics,"Apr 8, 2015",1,break courageous federal judge denies obamas r...
2,25461,5943,Republican Senator Prays For God To Kill Obam...,The Republican Party is currently holding a jo...,News,"June 10, 2016",1,republican senator prays god kill obama soon p...
3,17391,6004,Trump Caught Paying Women Working On His Camp...,It s not surprising that Donald Trump would pa...,News,"June 5, 2016",1,trump catch pay woman work campaign men
4,5995,15252,HILLARY EXPOSED: WATCH UNCOVERED VIDEO The Hil...,Hillary shows her true colors when a female st...,politics,"Aug 31, 2015",1,hillary expose watch uncovered video hillary c...


In [47]:
y_test = test_df.label.values
X_test = transform_text(test_df)

In [48]:
def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return classification_report(y_test,y_pred)

In [50]:
model_name = "FakeNews_Classifier"
%time test_model(name=model_name, stage="Staging", X_test=X_test, y_test=y_test)

CPU times: user 240 ms, sys: 24 ms, total: 264 ms
Wall time: 263 ms


'              precision    recall  f1-score   support\n\n           0       0.90      0.94      0.92      2702\n           1       0.94      0.90      0.92      2921\n\n    accuracy                           0.92      5623\n   macro avg       0.92      0.92      0.92      5623\nweighted avg       0.92      0.92      0.92      5623\n'

In [51]:
%time test_model(name=model_name, stage="Production", X_test=X_test, y_test=y_test)

CPU times: user 30.6 ms, sys: 4.01 ms, total: 34.6 ms
Wall time: 34.1 ms


'              precision    recall  f1-score   support\n\n           0       0.92      0.96      0.94      2702\n           1       0.96      0.92      0.94      2921\n\n    accuracy                           0.94      5623\n   macro avg       0.94      0.94      0.94      5623\nweighted avg       0.94      0.94      0.94      5623\n'