In [1]:
import math
import river
from river import compose
from river import feature_extraction
from river import linear_model

In [2]:
model = compose.Pipeline(
    ('vectorizer', feature_extraction.TFIDF()),
    ('classifier', linear_model.LogisticRegression())
)

In [3]:
# Mapping string labels to integers
label_mapping = {'yes': 1, 'no': 0, 'unknown': 2}

In [4]:
docs = [ ('Chinese Beijing Chinese', 'yes'),
        ('Chinese Chinese Shanghai', 'yes'),
        ('Chinese Macao', 'yes'),
        ('Tokyo Japan Chinese', 'no') ]

In [5]:
docs

[('Chinese Beijing Chinese', 'yes'),
 ('Chinese Chinese Shanghai', 'yes'),
 ('Chinese Macao', 'yes'),
 ('Tokyo Japan Chinese', 'no')]

In [6]:
# for sentence, label in docs:
#     model = model.learn_one(sentence, label)

# Loop through the documents and learn one instance at a time
for sentence, label in docs:
    # Convert label from string to integer
    int_label = label_mapping[label]
    
    # Learn the model
    model.learn_one(sentence, int_label)

In [7]:
new_unseen_text = 'Tokyo india'

In [8]:
# model.predict_one(new_unseen_text)
print(f"Prediction for '{new_unseen_text}': {model.predict_one(new_unseen_text)}")

Prediction for 'Tokyo india': True


In [9]:
model.learn_one('Tokyo India USA', label_mapping['unknown'])

In [10]:
# Check the model state after learning the new instance
print(model)

TFIDF | LogisticRegression


In [11]:
# Predict again after learning the new instance 
new_unseen_text_2 = 'Perhaps Japan'
print(f"Prediction for '{new_unseen_text_2}': {model.predict_one(new_unseen_text_2)}")

Prediction for 'Perhaps Japan': True


In [12]:
# Pipeline (
#     TFIDF (
#         on = None
#         strip_accents = True
#         lowercase = False
#         preprocessor = None
#         vectorizer = <build-in method findall of re.Pattern object at 0x00000236CF37D5E0>
#         ngram_range = (1,1)
#     ),
#     MultinominalNB (
#         alpha = 1
#     )
# )

In [13]:
# model.predict_one("India Germany")