# Incremental Model training with NLP

In [101]:
import numpy as np
import pandas as pd

messages = pd.read_csv('SMSSpamCollection',sep='\t',names =['labels','text'])
messages.head()

Unnamed: 0,labels,text
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [102]:
messages.shape

(5572, 2)

In [103]:
from sklearn.model_selection import train_test_split
X_train,X_test = train_test_split(messages)

In [104]:
X_train.shape,X_test.shape

((4179, 2), (1393, 2))

In [105]:
X_train

Unnamed: 0,labels,text
3030,ham,What is your record for one night? :)
5051,ham,I will vote for wherever my heart guides me
3835,ham,Watching tv lor. Nice one then i like lor.
2236,ham,Si.como no?!listened2the plaid album-quite gd&...
4664,ham,No. Yes please. Been swimming?
...,...,...
3694,ham,I was about to do it when i texted. I finished...
703,ham,What is important is that you prevent dehydrat...
5552,ham,Have a safe trip to Nigeria. Wish you happines...
5103,ham,In other news after hassling me to get him wee...


In [106]:
# Converting dataframe into list of tuples as this is the requirement of creme

X_train = X_train.to_records(index=False)
X_test = X_test.to_records(index=False)

print(X_train)
print(X_test)


[('ham', 'What is your record for one night? :)')
 ('ham', 'I will vote for wherever my heart guides me')
 ('ham', 'Watching tv lor. Nice one then i like lor.') ...
 ('ham', 'Have a safe trip to Nigeria. Wish you happiness and very soon company to share moments with')
 ('ham', 'In other news after hassling me to get him weed for a week andres has no money. HAUGHAIGHGTUJHYGUJ')
 ('ham', 'sir, you will receive the account no another 1hr time. Sorry for the delay.')]
[('ham', 'K sure am in my relatives home. Sms me de. Pls:-)')
 ('ham', 'S:-)kallis wont play in first two odi:-)')
 ('ham', 'Dear got train and seat mine lower seat') ...
 ('ham', 'This message is from a great Doctor in India:-): 1) Do not drink APPY FIZZ. It contains Cancer causing age')
 ('spam', 'HMV BONUS SPECIAL 500 pounds of genuine HMV vouchers to be won. Just answer 4 easy questions. Play Now! Send HMV to 86688 More info:www.100percent-real.com')
 ('spam', "I want some cock! My hubby's away, I need a real man 2 satisf

In [107]:
# Creating the Pipeline
# 1st function creating TFIDF
# 2nd function creating naive bayes predictor
import math
from creme import compose
from creme import feature_extraction
from creme import naive_bayes
import creme

model = compose.Pipeline(
                        ('tokenize',feature_extraction.TFIDF(lowercase=False)),
                        ('nb',naive_bayes.MultinomialNB(alpha=1))
)


In [108]:
from creme import metrics
metric = metrics.Accuracy()

# Training the model with the Training Data
for label,sentence in X_train:
    model.fit_one(sentence,label)
    y_pred = model.predict_one(sentence)
    metric = metric.update(label,y_pred)


In [109]:
print(metric) # This is for the Training Data!

Accuracy: 95.91%


In [110]:
from creme import metrics
test_metric = metrics.Accuracy()
for label,sentence in X_test:
    y_pred = model.predict_one(sentence)
    test_metric = metric.update(label,y_pred)

In [111]:
print(test_metric) # This is our Test Accuracy!

Accuracy: 95.94%


### Training with some new data again

In [112]:
model.fit_one("This is a new message after some time","ham")
model.fit_one("Hurray!!! you won a mercedes benz!! ","spam")

Pipeline (
  TFIDF (
    normalize=True
    on=None
    strip_accents=True
    lowercase=False
    preprocessor=None
    tokenizer=<built-in method findall of re.Pattern object at 0x0000016B4E47A370>
    ngram_range=(1, 1)
  ),
  MultinomialNB (
    alpha=1
  )
)

In [114]:
model.predict_one("Welcome to UK-mobile-date this msg is FREE giving you free calling")

'spam'