Importing dependencies 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction import text
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

Read the dataset

In [None]:
data = pd.read_csv('News.csv')
print(data.head())

Visualizing the dataset

In [None]:
categories = data["News Category"].value_counts()
label = categories.index
count = categories.values


# Graph plotted using matplotlib
plt.bar(label, count)

plt.title('Types of News Categories')
plt.xlabel('Category')
plt.ylabel('Number of Values')

plt.xticks(rotation=60)


Finding the cosine similarity between the titles of articles

In [None]:
feature = data["Title"].tolist()
tfidf = text.TfidfVectorizer(input=feature, stop_words="english")
tfidf_matrix = tfidf.fit_transform(feature)
similarity = cosine_similarity(tfidf_matrix)

Removing duplicates to avoid confusion during recommendation

In [None]:
indices = pd.Series(data.index, index=data['Title']).drop_duplicates()

Recommending an article based of the similarities

In [None]:
def news_recommendation(Title, similarity = similarity):
    index = indices[Title]
    similarity_scores = list(enumerate(similarity[index]))
    similarity_scores = sorted(similarity_scores, 
    key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[0:10]
    newsindices = [i[0] for i in similarity_scores]
    return data['Title'].iloc[newsindices]

Printing a title based on the input

In [None]:
print(news_recommendation("Walmart Slashes Prices on Last-Generation iPads"))