In [16]:
corpus = [
    "Hemant Pandey on upcoming track, his role in 'Mere Sai: Shraddha aur Saburi",
    "Television actor Hemant Pandey will be seen essaying the role of 'Prasad' in 'Mere Sai",
    "I tried Gordon Ramsay's recipe for air-fryer steak and it only took 20 minutes for a perfect result",
    "I never even thought you could - or should cook a nice steak in the air fryer, but Gordon Ramsay's coffee-and-chili-rubbed rib-eye recipe has totally proved me wrong.",
 ]

In [2]:
import numpy as np
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

In [17]:
# Builds a dictionary of features and transforms documents to feature vectors and convert our text documents to a
# matrix of token counts (CountVectorizer)
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(corpus)

# transform a count matrix to a normalized tf-idf representation (tf-idf transformer)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)

In [22]:
knn = KNeighborsClassifier(n_neighbors=2)

# train_data.target
target = [1,0,2,2]
# training our classifier ; train_data.target will be having numbers assigned for each category in train data
clf = knn.fit(X_train_tfidf, target)

# Input Data to predict their classes of the given categories
docs_new = ['I have a Harley Davidson and Yamaha.', 'I have a GTX 1050 GPU','Television discovery increased entertainment',
            "Sports Minister Anurag Thakur on Sunday came hard on Hockey India for unilaterally deciding to pull out of next year's Commonwealth Games, saying the national federation is bound to consult with the government before taking any such step.",
           "should cook a nice steak in the air fryer","Pavan Likes datascience"]
# building up feature vector of our input
X_new_counts = count_vect.transform(docs_new)
# We call transform instead of fit_transform because it's already been fit
X_new_tfidf = tfidf_transformer.transform(X_new_counts)

In [23]:
# predicting the category of our input text: Will give out number for category
predicted = clf.predict(X_new_tfidf)

for doc, category in zip(docs_new, predicted):
    print('%r => %s' % (doc, target[category]))

'I have a Harley Davidson and Yamaha.' => 2
'I have a GTX 1050 GPU' => 1
'Television discovery increased entertainment' => 1
"Sports Minister Anurag Thakur on Sunday came hard on Hockey India for unilaterally deciding to pull out of next year's Commonwealth Games, saying the national federation is bound to consult with the government before taking any such step." => 1
'should cook a nice steak in the air fryer' => 2
'Pavan Likes datascience' => 1
