In [2]:
import nltk
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
import string

In [3]:
stopwords = nltk.corpus.stopwords.words('english')
ps = nltk.PorterStemmer()

data = pd.read_csv("SMSSpamCollection", sep='\t')
data.columns = ['label', 'body_text']

In [4]:
def count_punct(text):
    count = sum([1 for char in text if char in string.punctuation])
    return round(count/(len(text) - text.count(" ")), 3) * 100

data['body_len'] = data['body_text'].apply(lambda x: len(x) - x.count(" "))
data['punct%'] = data['body_text'].apply(lambda x: count_punct(x))

In [5]:
def clean_text(text):
    text = "".join([word.lower() for word in text if word not in string.punctuation])
    tokens = re.split('\W+', text)
    text = [ps.stem(word) for word in tokens if word not in stopwords]
    return text

In [6]:
tfidf_vect = TfidfVectorizer(analyzer=clean_text)
X_tfidf = tfidf_vect.fit_transform(data['body_text'])

X_features = pd.concat([data['body_len'], data['punct%'], pd.DataFrame(X_tfidf.toarray())], axis=1)
X_features.head()

Unnamed: 0,body_len,punct%,0,1,2,3,4,5,6,7,...,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106
0,24,25.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,128,4.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,39,15.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,49,4.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,116,6.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
from sklearn.ensemble import RandomForestClassifier

In [8]:
print(dir(RandomForestClassifier))

['__abstractmethods__', '__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_check_n_features', '_estimator_type', '_get_param_names', '_get_tags', '_make_estimator', '_more_tags', '_repr_html_', '_repr_html_inner', '_repr_mimebundle_', '_required_parameters', '_set_oob_score', '_validate_X_predict', '_validate_data', '_validate_estimator', '_validate_y_class_weight', 'apply', 'decision_path', 'feature_importances_', 'fit', 'get_params', 'predict', 'predict_log_proba', 'predict_proba', 'score', 'set_params']


# Cross-Validation

In [9]:
from sklearn.model_selection import KFold, cross_val_score

In [10]:
rf = RandomForestClassifier(n_jobs=-1)
k_fold = KFold(n_splits=5)
cross_val_score(rf, X_features, data['label'], cv=k_fold, scoring='accuracy', n_jobs=-1)

array([0.97668161, 0.97666068, 0.97396768, 0.96409336, 0.97307002])

# Holdout Set

In [11]:
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.model_selection import train_test_split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X_features, data['label'], test_size=0.2)

In [13]:
from sklearn.ensemble import RandomForestClassifier

In [14]:
rf = RandomForestClassifier(n_estimators=50, max_depth=20, n_jobs=-1)
rf_model = rf.fit(X_train, y_train)

In [15]:
sorted(zip(rf_model.feature_importances_, X_train.columns), reverse=True)[0:10]

[(0.050705260696725624, 1804),
 (0.044561983170848724, 7353),
 (0.043362086830915594, 'body_len'),
 (0.03646690874332596, 3135),
 (0.028209216193123617, 4799),
 (0.02224669021235153, 2032),
 (0.018475540139560886, 5991),
 (0.01758479431756765, 6288),
 (0.017374060509245025, 1361),
 (0.0145274253894216, 6749)]

In [16]:
y_pred = rf_model.predict(X_test)
precision, recall, fscore, support = score(y_test, y_pred, pos_label='spam', average='binary')

In [17]:
print('Precision: {} / Recall: {} / Accuracy: {}'.format(round(precision, 3), round(recall, 3), round((y_pred==y_test).sum() / len(y_pred), 3)))

Precision: 1.0 / Recall: 0.591 / Accuracy: 0.942


# Grid Search

In [18]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.model_selection import train_test_split

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X_features, data['label'], test_size = 0.2)

In [20]:
def train_RF(n_est, depth):
    rf = RandomForestClassifier(n_estimators = n_est, max_depth=depth, n_jobs=-1)
    rf_model = rf.fit(X_train, y_train)
    y_pred = rf_model.predict(X_test)
    precision, recall, fscore, support = score(y_test, y_pred, pos_label='spam', average='binary')
    print('Est: {} / Depth: {} ----- Precision: {} / Recall: {} / Accuracy: {}'.format(
        n_est, depth, round(precision, 3), round(recall,3),
        round((y_pred==y_test).sum() / len(y_pred), 3)))

In [21]:
for n_est in [10, 50, 100]:
    for depth in [10, 20, 30, None]:
        train_RF(n_est, depth)

Est: 10 / Depth: 10 ----- Precision: 1.0 / Recall: 0.308 / Accuracy: 0.909
Est: 10 / Depth: 20 ----- Precision: 1.0 / Recall: 0.623 / Accuracy: 0.951
Est: 10 / Depth: 30 ----- Precision: 0.981 / Recall: 0.719 / Accuracy: 0.961
Est: 10 / Depth: None ----- Precision: 1.0 / Recall: 0.753 / Accuracy: 0.968
Est: 50 / Depth: 10 ----- Precision: 1.0 / Recall: 0.24 / Accuracy: 0.9
Est: 50 / Depth: 20 ----- Precision: 1.0 / Recall: 0.589 / Accuracy: 0.946
Est: 50 / Depth: 30 ----- Precision: 1.0 / Recall: 0.644 / Accuracy: 0.953
Est: 50 / Depth: None ----- Precision: 1.0 / Recall: 0.795 / Accuracy: 0.973
Est: 100 / Depth: 10 ----- Precision: 1.0 / Recall: 0.185 / Accuracy: 0.893
Est: 100 / Depth: 20 ----- Precision: 1.0 / Recall: 0.562 / Accuracy: 0.943
Est: 100 / Depth: 30 ----- Precision: 1.0 / Recall: 0.651 / Accuracy: 0.954
Est: 100 / Depth: None ----- Precision: 1.0 / Recall: 0.774 / Accuracy: 0.97


 # Random Forest Evaluation

In [22]:
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer


In [23]:
# TF-IDF
tfidf_vect = TfidfVectorizer(analyzer=clean_text)
X_tfidf = tfidf_vect.fit_transform(data['body_text'])
X_tfidf_feat = pd.concat([data['body_len'], data['punct%'], pd.DataFrame(X_tfidf.toarray())], axis=1)

In [24]:
# CountVectorizer
count_vect = CountVectorizer(analyzer=clean_text)
X_count = count_vect.fit_transform(data['body_text'])
X_count_feat = pd.concat([data['body_len'], data['punct%'], pd.DataFrame(X_count.toarray())], axis=1)

X_count_feat.head()

Unnamed: 0,body_len,punct%,0,1,2,3,4,5,6,7,...,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106
0,24,25.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,128,4.7,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,39,15.4,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,49,4.1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,116,6.9,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [26]:
rf = RandomForestClassifier()
param = {'n_estimators': [10, 150, 300],
         'max_depth': [30, 60, 90, None]}

gs = GridSearchCV(rf, param, cv=5, n_jobs=-1)
gs_fit = gs.fit(X_tfidf_feat, data['label'])
pd.DataFrame(gs_fit.cv_results_).sort_values('mean_test_score', ascending = False)[0:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
7,81.073345,8.692352,1.691046,1.694966,90.0,150,"{'max_depth': 90, 'n_estimators': 150}",0.975785,0.977558,0.975763,0.970377,0.972172,0.974331,0.002641,1
11,85.562669,26.23426,0.415009,0.041678,,300,"{'max_depth': None, 'n_estimators': 300}",0.975785,0.978456,0.973968,0.969479,0.971275,0.973792,0.003184,2
8,106.000853,17.393676,1.943905,0.882483,90.0,300,"{'max_depth': 90, 'n_estimators': 300}",0.977578,0.975763,0.974865,0.969479,0.969479,0.973433,0.003344,3
10,53.320802,20.461331,1.521435,1.364875,,150,"{'max_depth': None, 'n_estimators': 150}",0.977578,0.977558,0.97307,0.965889,0.969479,0.972715,0.004567,4
5,90.552154,21.509867,2.018187,1.494016,60.0,300,"{'max_depth': 60, 'n_estimators': 300}",0.977578,0.973968,0.974865,0.966786,0.967684,0.972176,0.004216,5


In [27]:
rf = RandomForestClassifier()
param = {'n_estimators': [10, 150, 300],
         'max_depth': [30, 60, 90, None]}

gs = GridSearchCV(rf, param, cv=5, n_jobs=-1)
gs_fit = gs.fit(X_count_feat, data['label'])
pd.DataFrame(gs_fit.cv_results_).sort_values('mean_test_score', ascending = False)[0:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
11,71.896816,14.403747,1.076919,0.830002,,300,"{'max_depth': None, 'n_estimators': 300}",0.976682,0.975763,0.973968,0.970377,0.968582,0.973074,0.003114,1
8,66.520901,0.240643,0.586031,0.02942,90.0,300,"{'max_depth': 90, 'n_estimators': 300}",0.977578,0.975763,0.973968,0.968582,0.968582,0.972895,0.003702,2
10,36.08013,5.108772,0.39315,0.061268,,150,"{'max_depth': None, 'n_estimators': 150}",0.976682,0.973968,0.973968,0.970377,0.968582,0.972715,0.002878,3
7,34.155736,0.347949,0.404318,0.054673,90.0,150,"{'max_depth': 90, 'n_estimators': 150}",0.978475,0.973968,0.97307,0.970377,0.967684,0.972715,0.003624,4
6,3.999505,0.376613,0.288827,0.112069,90.0,10,"{'max_depth': 90, 'n_estimators': 10}",0.975785,0.977558,0.975763,0.964991,0.964093,0.971638,0.005837,5


# Gradient Boosting


In [28]:
from sklearn.ensemble import GradientBoostingClassifier

In [29]:
print(dir(GradientBoostingClassifier))

['_SUPPORTED_LOSS', '__abstractmethods__', '__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_check_initialized', '_check_n_features', '_check_params', '_clear_state', '_compute_partial_dependence_recursion', '_estimator_type', '_fit_stage', '_fit_stages', '_get_param_names', '_get_tags', '_init_state', '_is_initialized', '_make_estimator', '_more_tags', '_raw_predict', '_raw_predict_init', '_repr_html_', '_repr_html_inner', '_repr_mimebundle_', '_required_parameters', '_resize_state', '_staged_raw_predict', '_validate_data', '_validate_estimator', '_validate_y', 'apply', 'decision_function', 'fe

In [30]:
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.model_selection import train_test_split

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X_features, data['label'], test_size=0.2)

In [32]:
def train_GB(est, max_depth, lr):
    gb = GradientBoostingClassifier(n_estimators=est, max_depth=max_depth, learning_rate=lr)
    gb_model = gb.fit(X_train, y_train)
    y_pred = gb_model.predict(X_test)
    precision, recall, fscore, train_support = score(y_test, y_pred, pos_label='spam', average='binary')
    print('Est: {} / Depth: {} / LR: {} ----- Precision: {} / Recall: {} / Accuracy: {}'.format(
            est, max_depth, lr, round(precision, 3), round(recall, 3),
            round((y_pred == y_test).sum()/len(y_pred), 3 )))

In [33]:
for n_est in [50, 100, 150]:
    for max_depth in [3, 7, 11, 15]:
        for lr in [0.01, 0.1, 1]:
            train_GB(n_est, max_depth, lr)

  _warn_prf(average, modifier, msg_start, len(result))


Est: 50 / Depth: 3 / LR: 0.01 ----- Precision: 0.0 / Recall: 0.0 / Accuracy: 0.871
Est: 50 / Depth: 3 / LR: 0.1 ----- Precision: 0.944 / Recall: 0.708 / Accuracy: 0.957
Est: 50 / Depth: 3 / LR: 1 ----- Precision: 0.872 / Recall: 0.757 / Accuracy: 0.954
Est: 50 / Depth: 7 / LR: 0.01 ----- Precision: 0.0 / Recall: 0.0 / Accuracy: 0.87
Est: 50 / Depth: 7 / LR: 0.1 ----- Precision: 0.928 / Recall: 0.806 / Accuracy: 0.967
Est: 50 / Depth: 7 / LR: 1 ----- Precision: 0.926 / Recall: 0.785 / Accuracy: 0.964
Est: 50 / Depth: 11 / LR: 0.01 ----- Precision: 1.0 / Recall: 0.007 / Accuracy: 0.872
Est: 50 / Depth: 11 / LR: 0.1 ----- Precision: 0.921 / Recall: 0.806 / Accuracy: 0.966
Est: 50 / Depth: 11 / LR: 1 ----- Precision: 0.943 / Recall: 0.799 / Accuracy: 0.968


  _warn_prf(average, modifier, msg_start, len(result))


Est: 50 / Depth: 15 / LR: 0.01 ----- Precision: 0.0 / Recall: 0.0 / Accuracy: 0.871
Est: 50 / Depth: 15 / LR: 0.1 ----- Precision: 0.907 / Recall: 0.812 / Accuracy: 0.965
Est: 50 / Depth: 15 / LR: 1 ----- Precision: 0.929 / Recall: 0.819 / Accuracy: 0.969
Est: 100 / Depth: 3 / LR: 0.01 ----- Precision: 0.933 / Recall: 0.486 / Accuracy: 0.929
Est: 100 / Depth: 3 / LR: 0.1 ----- Precision: 0.958 / Recall: 0.785 / Accuracy: 0.968
Est: 100 / Depth: 3 / LR: 1 ----- Precision: 0.869 / Recall: 0.736 / Accuracy: 0.952
Est: 100 / Depth: 7 / LR: 0.01 ----- Precision: 0.97 / Recall: 0.667 / Accuracy: 0.954
Est: 100 / Depth: 7 / LR: 0.1 ----- Precision: 0.93 / Recall: 0.833 / Accuracy: 0.97
Est: 100 / Depth: 7 / LR: 1 ----- Precision: 0.92 / Recall: 0.799 / Accuracy: 0.965
Est: 100 / Depth: 11 / LR: 0.01 ----- Precision: 0.964 / Recall: 0.736 / Accuracy: 0.962
Est: 100 / Depth: 11 / LR: 0.1 ----- Precision: 0.922 / Recall: 0.819 / Accuracy: 0.968
Est: 100 / Depth: 11 / LR: 1 ----- Precision: 0.944

# Parameter Settings with GridSearchCV


In [38]:
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import time

In [35]:
gb = GradientBoostingClassifier()

param = {
    'n_estimators': [100, 150],
    'max_depth': [7, 11, 15],
    'learning_rate': [0.1]
}

clf = GridSearchCV(gb, param, cv=5, n_jobs=-1)
cv_fit = clf.fit(X_tfidf_feat, data['label'])
pd.DataFrame(cv_fit.cv_results_).sort_values('mean_test_score', ascending = False)[0:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
1,413.632026,5.315942,0.491541,0.118583,0.1,7,150,"{'learning_rate': 0.1, 'max_depth': 7, 'n_esti...",0.963229,0.980251,0.971275,0.968582,0.964991,0.969665,0.005985,1
5,718.219414,118.051008,0.378388,0.119598,0.1,15,150,"{'learning_rate': 0.1, 'max_depth': 15, 'n_est...",0.963229,0.975763,0.971275,0.967684,0.969479,0.969486,0.004123,2
0,404.595101,43.631406,2.281031,1.043162,0.1,7,100,"{'learning_rate': 0.1, 'max_depth': 7, 'n_esti...",0.964126,0.978456,0.971275,0.965889,0.966786,0.969306,0.005147,3
3,603.158547,4.455121,0.813333,0.773433,0.1,11,150,"{'learning_rate': 0.1, 'max_depth': 11, 'n_est...",0.963229,0.977558,0.972172,0.965889,0.965889,0.968947,0.005215,4
4,476.486884,13.436054,1.00187,0.983848,0.1,15,100,"{'learning_rate': 0.1, 'max_depth': 15, 'n_est...",0.964126,0.97307,0.967684,0.966786,0.967684,0.96787,0.002909,5


In [36]:
gb = GradientBoostingClassifier()

param = {
    'n_estimators': [50, 100, 150],
    'max_depth': [7, 11, 15],
    'learning_rate': [0.1]
}

clf = GridSearchCV(gb, param, cv=5, n_jobs=-1)
cv_fit = clf.fit(X_count_feat, data['label'])
pd.DataFrame(cv_fit.cv_results_).sort_values('mean_test_score', ascending = False)[0:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
5,522.384134,23.44644,7.656829,12.561122,0.1,11,150,"{'learning_rate': 0.1, 'max_depth': 11, 'n_est...",0.965022,0.978456,0.968582,0.969479,0.971275,0.970563,0.004441,1
8,999.75703,117.913639,1.401895,1.447839,0.1,15,150,"{'learning_rate': 0.1, 'max_depth': 15, 'n_est...",0.965919,0.978456,0.969479,0.966786,0.971275,0.970383,0.004464,2
2,419.561394,8.999212,0.941082,0.865635,0.1,7,150,"{'learning_rate': 0.1, 'max_depth': 7, 'n_esti...",0.966816,0.979354,0.971275,0.966786,0.967684,0.970383,0.004779,3
7,442.512306,10.013614,3.929738,4.181409,0.1,15,100,"{'learning_rate': 0.1, 'max_depth': 15, 'n_est...",0.964126,0.975763,0.967684,0.967684,0.968582,0.968768,0.003818,4
6,202.411612,8.755448,0.863659,1.022643,0.1,15,50,"{'learning_rate': 0.1, 'max_depth': 15, 'n_est...",0.964126,0.973968,0.966786,0.967684,0.969479,0.968409,0.003273,5


In [41]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data[['body_text', 'body_len', 'punct%']], data['label'], test_size=0.2)

In [42]:
tfidf_vect = TfidfVectorizer(analyzer=clean_text)
tfidf_vect_fit = tfidf_vect.fit(X_train['body_text'])

In [43]:
tfidf_train = tfidf_vect_fit.transform(X_train['body_text'])
tfidf_test = tfidf_vect_fit.transform(X_test['body_text'])

In [44]:
X_train_vect = pd.concat([X_train[['body_len', 'punct%']].reset_index(drop=True), 
                         pd.DataFrame(tfidf_train.toarray())], axis=1)

In [45]:
X_test_vect = pd.concat([X_test[['body_len', 'punct%']].reset_index(drop=True),
                        pd.DataFrame(tfidf_test.toarray())], axis=1)

In [46]:
X_train_vect.head()

Unnamed: 0,body_len,punct%,0,1,2,3,4,5,6,7,...,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183
0,43,4.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,116,2.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,34,14.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,21,4.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,50,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [47]:
rf = RandomForestClassifier(n_estimators=150, max_depth=None, n_jobs=-1)

rf_model = rf.fit(X_train_vect, y_train)
y_pred = rf_model.predict(X_test_vect)

precision, recall, fscore, train_support = score(y_test, y_pred, pos_label='spam', average='binary')
print('Precision: {} / Recall: {} / Accuracy: {}'.format(
        round(precision, 3), round(recall, 3), round((y_pred==y_test).sum() / len(y_pred), 3 )))

Precision: 1.0 / Recall: 0.82 / Accuracy: 0.974


In [48]:
gb = GradientBoostingClassifier(n_estimators=150, max_depth=11)

gb_model = gb.fit(X_train_vect, y_train)
y_pred = gb_model.predict(X_test_vect)

precision, recall, fscore, train_support = score(y_test, y_pred, pos_label='spam', average='binary')
print('Precision: {} / Recall: {} / Accuracy: {}'.format(
        round(precision, 3), round(recall, 3), round((y_pred==y_test).sum() / len(y_pred), 3 )))

Precision: 0.951 / Recall: 0.839 / Accuracy: 0.97
