# Recommendation system

### Problem statement.
Build a recommender system by using cosine simillarties score.
DATASET: book


In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
#Load the data
df = pd.read_excel("C:\\Users\\SHUBHAM GARKAL\\Downloads\\book (RS).xlsx")

In [10]:
# Convert 'Book.Title' to string type and ensure 'Book.Rating' is numeric
df['Book.Title'] = df['Book.Title'].astype(str)
df['Book.Rating'] = pd.to_numeric(df['Book.Rating'], errors='coerce')
df

Unnamed: 0,User.ID,Book.Title,Book.Rating
0,8,Ancient Celtic Romances,5.0
1,8,Keepers of the Earth Teachers Guide,6.0
2,8,The Art Of Celtia,7.0
3,8,The Celts Activity Book,6.0
4,8,The Western way: A practical guide to the West...,5.0
...,...,...,...
9988,278854,A corrente de Trewis Scott,7.0
9989,278854,As valkÃ­rias,7.0
9990,278854,Blast From the Past,7.0
9991,278854,Celtic Mythology (Library of the World's Myths...,8.0


In [4]:
# Handle duplicate entries by taking the mean value of ratings
df = df.groupby(['User.ID', 'Book.Title'], as_index=False)['Book.Rating'].mean()

In [11]:
# Pivot the dataframe to get it in the format suitable for cosine similarity calculation
pivot_df = df.pivot(index='User.ID', columns='Book.Title', values='Book.Rating').fillna(0)
pivot_df

Book.Title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,Repairing PC Drives &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,01-01-00: A Novel of the Millennium,"1,401 More Things That P*Ss Me Off",10 Commandments Of Dating,"100 Great Fantasy Short, Short Stories",...,Zora Hurston and the Chinaberry Tree (Reading Rainbow Book),\Even Monkeys Fall from Trees\ and Other Japanese Proverbs,\I Won't Learn from You\: And Other Thoughts on Creative Maladjustment,"\More More More,\ Said the Baby",\O\ Is for Outlaw,"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character","\Well, there's your problem\: Cartoons",iI Paradiso Degli Orchi,stardust,Ã?Â?bermorgen.
User.ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278851,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0
278852,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# Calculate TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(pivot_df)

In [7]:
# Calculate cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [8]:
# Function to get recommendations based on cosine similarity scores
def get_recommendations(book_title, cosine_sim=cosine_sim):
    # Get the index of the book
    book_index = pivot_df.columns.get_loc(book_title)
    # Get similarity scores of the book with all other books
    sim_scores = list(enumerate(cosine_sim[book_index]))
    # Sort the books based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    # Get the top 5 most similar books
    top_books = sim_scores[1:6]
    # Get the titles of the top books
    top_books_titles = [pivot_df.columns[i[0]] for i in top_books]
    return top_books_titles

In [9]:
# Test the function
book_title = 'Classical Mythology'
recommendations = get_recommendations(book_title)
print("Recommendations for '{}':".format(book_title))
for book in recommendations:
    print(book)

Recommendations for 'Classical Mythology':
Mythology 101 (Questar Fantasy)
Celtic Mythology (Library of the World's Myths and Legends)
 Jason, Madison &amp
 Other Stories;Merril;1985;McClelland &amp
 Repairing PC Drives &amp
