In [46]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# Load the dataset



In [47]:
data = pd.read_csv('/kaggle/input/rs-book/book.csv', encoding='latin1')

In [48]:
data

Unnamed: 0.1,Unnamed: 0,User.ID,Book.Title,Book.Rating
0,1,276726,Classical Mythology,5
1,2,276729,Clara Callan,3
2,3,276729,Decision in Normandy,6
3,4,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,5,276737,The Mummies of Urumchi,6
...,...,...,...,...
9995,9996,162121,American Fried: Adventures of a Happy Eater.,7
9996,9997,162121,Cannibal In Manhattan,9
9997,9998,162121,How to Flirt: A Practical Guide,7
9998,9999,162121,Twilight,8


In [49]:
print(data.head())

   Unnamed: 0  User.ID                                         Book.Title  \
0           1   276726                                Classical Mythology   
1           2   276729                                       Clara Callan   
2           3   276729                               Decision in Normandy   
3           4   276736  Flu: The Story of the Great Influenza Pandemic...   
4           5   276737                             The Mummies of Urumchi   

   Book.Rating  
0            5  
1            3  
2            6  
3            8  
4            6  


# Display the dimensions of the dataset


In [50]:
print(data.shape)

(10000, 4)


# Display the summary statistics of the dataset


In [51]:
print(data.describe())

        Unnamed: 0        User.ID  Book.Rating
count  10000.00000   10000.000000  10000.00000
mean    5000.50000   95321.249800      7.56630
std     2886.89568  117645.703609      1.82152
min        1.00000       8.000000      1.00000
25%     2500.75000    2103.000000      7.00000
50%     5000.50000    3757.000000      8.00000
75%     7500.25000  162052.000000      9.00000
max    10000.00000  278854.000000     10.00000


In [52]:
data = data.drop(['Unnamed: 0'], axis=1)

In [53]:
data

Unnamed: 0,User.ID,Book.Title,Book.Rating
0,276726,Classical Mythology,5
1,276729,Clara Callan,3
2,276729,Decision in Normandy,6
3,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,276737,The Mummies of Urumchi,6
...,...,...,...
9995,162121,American Fried: Adventures of a Happy Eater.,7
9996,162121,Cannibal In Manhattan,9
9997,162121,How to Flirt: A Practical Guide,7
9998,162121,Twilight,8


### Renaming Columns

In [54]:
data.rename(columns = {'User.ID':'USER_ID', 'Book.Title':'book_title','Book.Rating':'book_rating'}, inplace = True)

In [55]:
data.head()

Unnamed: 0,USER_ID,book_title,book_rating
0,276726,Classical Mythology,5
1,276729,Clara Callan,3
2,276729,Decision in Normandy,6
3,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,276737,The Mummies of Urumchi,6


# Create a bag of words model


In [63]:
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(data['book_title'])

# Compute the cosine similarity matrix

In [67]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)

# Define a function to recommend books based on cosine similarity score

In [69]:
def get_recommendations(book_title, cosine_sim=cosine_sim, df=data):
    # Get the index of the book that matches the title
    idx = df[df['book_title'] == book_title].index[0]
    
    # Get the pairwise similarity scores of all books with that book
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the books based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the scores of the 10 most similar books
    sim_scores = sim_scores[1:11]
    
    # Get the indices of the similar books
    book_indices = [i[0] for i in sim_scores]
    
    # Return the top 10 most similar books
    return df['book_title'].iloc[book_indices]



# Test the function by recommending books similar to "Decision in Normandy"


In [72]:
print(get_recommendations('Decision in Normandy'))

3560            Currahee!:  A Screaming Eagle at Normandy
3294    Band of Brothers : E Company, 506th Regiment, ...
3559    If You Survive: From Normandy to the Battle of...
7425    The Journal of Scott Pendleton Collins: A Worl...
0                                     Classical Mythology
1                                            Clara Callan
3       Flu: The Story of the Great Influenza Pandemic...
4                                  The Mummies of Urumchi
5                                  The Kitchen God's Wife
6       What If?: The World's Foremost Military Histor...
Name: book_title, dtype: object
