In [1]:
import numpy as np 
import pandas as pd

In [2]:
import pandas as pd

# Read the file with the assumed correct encoding
try:
    # Reading with ISO-8859-1 encoding
    data = pd.read_csv('./Book_Details.csv', encoding='ISO-8859-1')
except UnicodeDecodeError:
    # Handle potential encoding errors
    print("Error reading file. Check the encoding.")

# Define the function to correct encoding
def correct_encoding(text):
    try:
        # Try to correct the encoding
        return text.encode('ISO-8859-1').decode('utf-8')
    except UnicodeDecodeError:
        # If an error occurs, return the original text
        return text

# Apply the encoding correction to the DataFrame column
data['Description'] = data['Description'].apply(correct_encoding)

# Print or inspect the corrected data
print(data['Description'])


0       John Berger’s Classic Text on Art\n\nWays of S...
1       Renowned not only as the best concise introduc...
2       You don’t need to be a genius, you just need t...
3       When Drawing on the Right Side of the Brain wa...
4       The Artist’s Way is the seminal book on the su...
                              ...                        
4875    For fans of Madeline Miller's Circe, a stunnin...
4876    Euripides' classic drama about the often morti...
4877    In The Persian Expedition, Xenophon, a young A...
4878    The House of Atreus is cursed. A bloodline tai...
4879    The Greek myths are one of the most important ...
Name: Description, Length: 4880, dtype: object


In [3]:
data.head()

Unnamed: 0,Title,Author,Description,Image_Url,Average_Rating,Total_Rating_Count
0,Ways of Seeing,John Berger,John Berger’s Classic Text on Art\n\nWays of S...,https://images-na.ssl-images-amazon.com/images...,3.92,378573
1,The Story of Art,E.H. Gombrich,Renowned not only as the best concise introduc...,https://images-na.ssl-images-amazon.com/images...,3.96,417875
2,Steal Like an Artist: 10 Things Nobody Told Yo...,Austin Kleon,"You don’t need to be a genius, you just need t...",https://images-na.ssl-images-amazon.com/images...,3.96,293826
3,The New Drawing on the Right Side of the Brain,Betty Edwards,When Drawing on the Right Side of the Brain wa...,https://images-na.ssl-images-amazon.com/images...,3.87,356503
4,The Artist's Way: A Spiritual Path to Higher C...,Julia Cameron,The Artist’s Way is the seminal book on the su...,https://images-na.ssl-images-amazon.com/images...,3.95,112017


In [4]:
books = data[['Title','Author','Description']]

In [5]:
books.head()

Unnamed: 0,Title,Author,Description
0,Ways of Seeing,John Berger,John Berger’s Classic Text on Art\n\nWays of S...
1,The Story of Art,E.H. Gombrich,Renowned not only as the best concise introduc...
2,Steal Like an Artist: 10 Things Nobody Told Yo...,Austin Kleon,"You don’t need to be a genius, you just need t..."
3,The New Drawing on the Right Side of the Brain,Betty Edwards,When Drawing on the Right Side of the Brain wa...
4,The Artist's Way: A Spiritual Path to Higher C...,Julia Cameron,The Artist’s Way is the seminal book on the su...


In [6]:
def remove_spaces(text):
    return text.replace(" ", "")

# Apply the function to the DataFrame column
books['Author'] = books['Author'].apply(remove_spaces)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  books['Author'] = books['Author'].apply(remove_spaces)


In [7]:
books.head()

Unnamed: 0,Title,Author,Description
0,Ways of Seeing,JohnBerger,John Berger’s Classic Text on Art\n\nWays of S...
1,The Story of Art,E.H.Gombrich,Renowned not only as the best concise introduc...
2,Steal Like an Artist: 10 Things Nobody Told Yo...,AustinKleon,"You don’t need to be a genius, you just need t..."
3,The New Drawing on the Right Side of the Brain,BettyEdwards,When Drawing on the Right Side of the Brain wa...
4,The Artist's Way: A Spiritual Path to Higher C...,JuliaCameron,The Artist’s Way is the seminal book on the su...


In [8]:
books['tags'] = books['Description'] +" "+ books['Author'] 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  books['tags'] = books['Description'] +" "+ books['Author']


In [9]:
books.head()

Unnamed: 0,Title,Author,Description,tags
0,Ways of Seeing,JohnBerger,John Berger’s Classic Text on Art\n\nWays of S...,John Berger’s Classic Text on Art\n\nWays of S...
1,The Story of Art,E.H.Gombrich,Renowned not only as the best concise introduc...,Renowned not only as the best concise introduc...
2,Steal Like an Artist: 10 Things Nobody Told Yo...,AustinKleon,"You don’t need to be a genius, you just need t...","You don’t need to be a genius, you just need t..."
3,The New Drawing on the Right Side of the Brain,BettyEdwards,When Drawing on the Right Side of the Brain wa...,When Drawing on the Right Side of the Brain wa...
4,The Artist's Way: A Spiritual Path to Higher C...,JuliaCameron,The Artist’s Way is the seminal book on the su...,The Artist’s Way is the seminal book on the su...


In [10]:
new = books.drop(columns=['Author','Description'])

In [11]:
new.head()

Unnamed: 0,Title,tags
0,Ways of Seeing,John Berger’s Classic Text on Art\n\nWays of S...
1,The Story of Art,Renowned not only as the best concise introduc...
2,Steal Like an Artist: 10 Things Nobody Told Yo...,"You don’t need to be a genius, you just need t..."
3,The New Drawing on the Right Side of the Brain,When Drawing on the Right Side of the Brain wa...
4,The Artist's Way: A Spiritual Path to Higher C...,The Artist’s Way is the seminal book on the su...


In [12]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,stop_words='english')

In [13]:
vector = cv.fit_transform(new['tags']).toarray()

In [16]:
vector.shape

(4880, 5000)

In [17]:
from sklearn.metrics.pairwise import cosine_similarity

In [18]:
similarity = cosine_similarity(vector)

In [20]:
similarity[0]

array([1.        , 0.29731473, 0.0586883 , ..., 0.01470747, 0.        ,
       0.01273705])

In [21]:
new[new['Title'] == 'Ways of Seeing'].index[0]

0

In [26]:
def recommend(movie):
    index = new[new['Title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    for i in distances[1:6]:
        print(new.iloc[i[0]].Title)

In [28]:
recommend('50 Reasons People Give for Believing in a God')

The God Argument: The Case against Religion and for Humanism
Atheism: The Case Against God
The Reason for God: Belief in an Age of Skepticism
The Reason for God: Belief in an Age of Skepticism
Breaking the Spell: Religion as a Natural Phenomenon
