In [1]:
# Import necessary libraries from River
import river

In [2]:
from river import compose
from river.feature_extraction import BagOfWords
from river.naive_bayes import MultinomialNB

In [3]:
# Sample data for training (text, label pairs)
data = [
    ('Chinese Beijing Chinese', 'yes'),
    ('Chinese Chinese Shanghai', 'yes'),
    ('Chinese Macao', 'yes'),
    ('Tokyo Japan Chinese', 'no'),
]

In [4]:
### This is just an example
corpus = [
         'This is the first document.',
        'This document is the second document.',
        'And this is the third one.',
        'Is this the first document?',
    ]
bow = river.feature_extraction.TFIDF()
for sentence in corpus:
    print(bow.transform_one(sentence))

{'this': 0.4472135954999579, 'is': 0.4472135954999579, 'the': 0.4472135954999579, 'first': 0.4472135954999579, 'document': 0.4472135954999579}
{'this': 0.35355339059327373, 'document': 0.7071067811865475, 'is': 0.35355339059327373, 'the': 0.35355339059327373, 'second': 0.35355339059327373}
{'and': 0.408248290463863, 'this': 0.408248290463863, 'is': 0.408248290463863, 'the': 0.408248290463863, 'third': 0.408248290463863, 'one': 0.408248290463863}
{'is': 0.4472135954999579, 'this': 0.4472135954999579, 'the': 0.4472135954999579, 'first': 0.4472135954999579, 'document': 0.4472135954999579}


In [5]:
from river.compose import Pipeline

In [6]:
# Initialize the pipeline with a bag-of-words vectorizer and a Multinomial Naive Bayes classifier
pipe_nb = Pipeline(
    ('vectorizer', BagOfWords(lowercase=True)),
    ('nb', MultinomialNB())
)

In [7]:
# Train the pipeline with the data
for sentence, label in data:
    pipe_nb.learn_one(sentence, label)

In [8]:
new_unseen_text = 'Tokyo india'

In [9]:
pipe_nb.predict_one(new_unseen_text)

'no'

### Training on a new data and new category

In [10]:
pipe_nb.learn_one('India USA', 'may be')

In [11]:
pipe_nb.predict_one('India Germany')

'may be'

### River with logistic regression

In [20]:
from river import compose, datasets, linear_model, metrics, preprocessing
from river.compose import Pipeline

X_y = datasets.Phishing()

model = Pipeline(
    preprocessing.StandardScaler(),
    linear_model.LogisticRegression()
)
metric = metrics.Accuracy()

for x, y in X_y:
    y_pred = model.predict_one(x)
    metric.update(y, y_pred)
    model.learn_one(x, y)

metric

Accuracy: 89.28%