# Importing libraries

In [1]:
from sklearn.linear_model import Perceptron 
from sklearn.datasets import fetch_20newsgroups
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

Setting Categories

In [2]:
categories = ['alt.atheism', 'sci.med']

Getting Data

In [3]:
train = fetch_20newsgroups(subset='train', categories=categories)

Define The Model Pipeline

In [4]:
model_pipeline = Pipeline([
    ('count_vectorizer', CountVectorizer()),
    ('tfidf_vectorizer', TfidfTransformer()),
    ('perceptron_model', Perceptron(max_iter=100, random_state=42))
])

In [5]:
model_pipeline.fit(train.data, train.target)

In [6]:
test_docs = ['Religion is widespread, even in modern times',
             'His kidney failed',
             'The pope is a controversial leader', 
             'White blood cells fightoff infections',
             'The reverend had a heart attack in church']
test_target = [0, 1, 0, 1, 0]

In [7]:
pred = model_pipeline.predict(test_docs)

In [8]:
for doc, category in zip(test_docs, pred):
    print(f'{doc} => {train.target_names[category]}')

Religion is widespread, even in modern times => alt.atheism
His kidney failed => sci.med
The pope is a controversial leader => sci.med
White blood cells fightoff infections => sci.med
The reverend had a heart attack in church => sci.med


In [9]:
f'Test Accuracy => {model_pipeline.score(test_docs, test_target) * 100}%'

'Test Accuracy => 60.0%'

In [10]:
f'Train Accuracy => {model_pipeline.score(train.data, train.target) * 100}%'

'Train Accuracy => 100.0%'