In [1]:
import numpy as np 
import pandas as pd

In [2]:
import pandas as pd

# Read the file with the assumed correct encoding
try:
    # Reading with ISO-8859-1 encoding
    data = pd.read_csv('./Book_Details.csv', encoding='ISO-8859-1')
except UnicodeDecodeError:
    # Handle potential encoding errors
    print("Error reading file. Check the encoding.")

# Define the function to correct encoding
def correct_encoding(text):
    try:
        # Try to correct the encoding
        return text.encode('ISO-8859-1').decode('utf-8')
    except UnicodeDecodeError:
        # If an error occurs, return the original text
        return text

# Apply the encoding correction to the DataFrame column
data['Description'] = data['Description'].apply(correct_encoding)
data['Title'] = data['Title'].apply(correct_encoding)

# Print or inspect the corrected data
print(data['Description'])


0       John Berger’s Classic Text on Art\n\nWays of S...
1       Renowned not only as the best concise introduc...
2       You don’t need to be a genius, you just need t...
3       When Drawing on the Right Side of the Brain wa...
4       The Artist’s Way is the seminal book on the su...
                              ...                        
4152    For fans of Madeline Miller's Circe, a stunnin...
4153    Euripides' classic drama about the often morti...
4154    In The Persian Expedition, Xenophon, a young A...
4155    The House of Atreus is cursed. A bloodline tai...
4156    The Greek myths are one of the most important ...
Name: Description, Length: 4157, dtype: object


In [3]:
data.head()

Unnamed: 0,Title,Author,Description,Image_Url,Average_Rating,Total_Rating_Count
0,Ways of Seeing,John Berger,John Berger’s Classic Text on Art\n\nWays of S...,https://images-na.ssl-images-amazon.com/images...,3.92,378573
1,The Story of Art,E.H. Gombrich,Renowned not only as the best concise introduc...,https://images-na.ssl-images-amazon.com/images...,3.96,417875
2,Steal Like an Artist: 10 Things Nobody Told Yo...,Austin Kleon,"You don’t need to be a genius, you just need t...",https://images-na.ssl-images-amazon.com/images...,3.96,293826
3,The New Drawing on the Right Side of the Brain,Betty Edwards,When Drawing on the Right Side of the Brain wa...,https://images-na.ssl-images-amazon.com/images...,3.87,356503
4,The Artist's Way: A Spiritual Path to Higher C...,Julia Cameron,The Artist’s Way is the seminal book on the su...,https://images-na.ssl-images-amazon.com/images...,3.95,112017


In [4]:
books = data[['Title','Author','Description']]

In [5]:
books.head()

Unnamed: 0,Title,Author,Description
0,Ways of Seeing,John Berger,John Berger’s Classic Text on Art\n\nWays of S...
1,The Story of Art,E.H. Gombrich,Renowned not only as the best concise introduc...
2,Steal Like an Artist: 10 Things Nobody Told Yo...,Austin Kleon,"You don’t need to be a genius, you just need t..."
3,The New Drawing on the Right Side of the Brain,Betty Edwards,When Drawing on the Right Side of the Brain wa...
4,The Artist's Way: A Spiritual Path to Higher C...,Julia Cameron,The Artist’s Way is the seminal book on the su...


In [6]:
def remove_spaces(text):
    return text.replace(" ", "")

# Apply the function to the DataFrame column
books['Author'] = books['Author'].apply(remove_spaces)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  books['Author'] = books['Author'].apply(remove_spaces)


In [7]:
books.head()

Unnamed: 0,Title,Author,Description
0,Ways of Seeing,JohnBerger,John Berger’s Classic Text on Art\n\nWays of S...
1,The Story of Art,E.H.Gombrich,Renowned not only as the best concise introduc...
2,Steal Like an Artist: 10 Things Nobody Told Yo...,AustinKleon,"You don’t need to be a genius, you just need t..."
3,The New Drawing on the Right Side of the Brain,BettyEdwards,When Drawing on the Right Side of the Brain wa...
4,The Artist's Way: A Spiritual Path to Higher C...,JuliaCameron,The Artist’s Way is the seminal book on the su...


In [8]:
books['tags'] = books['Description'] +" "+ books['Author'] 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  books['tags'] = books['Description'] +" "+ books['Author']


In [9]:
books.head()

Unnamed: 0,Title,Author,Description,tags
0,Ways of Seeing,JohnBerger,John Berger’s Classic Text on Art\n\nWays of S...,John Berger’s Classic Text on Art\n\nWays of S...
1,The Story of Art,E.H.Gombrich,Renowned not only as the best concise introduc...,Renowned not only as the best concise introduc...
2,Steal Like an Artist: 10 Things Nobody Told Yo...,AustinKleon,"You don’t need to be a genius, you just need t...","You don’t need to be a genius, you just need t..."
3,The New Drawing on the Right Side of the Brain,BettyEdwards,When Drawing on the Right Side of the Brain wa...,When Drawing on the Right Side of the Brain wa...
4,The Artist's Way: A Spiritual Path to Higher C...,JuliaCameron,The Artist’s Way is the seminal book on the su...,The Artist’s Way is the seminal book on the su...


In [10]:
new = books.drop(columns=['Author','Description'])

In [11]:
new.head()

Unnamed: 0,Title,tags
0,Ways of Seeing,John Berger’s Classic Text on Art\n\nWays of S...
1,The Story of Art,Renowned not only as the best concise introduc...
2,Steal Like an Artist: 10 Things Nobody Told Yo...,"You don’t need to be a genius, you just need t..."
3,The New Drawing on the Right Side of the Brain,When Drawing on the Right Side of the Brain wa...
4,The Artist's Way: A Spiritual Path to Higher C...,The Artist’s Way is the seminal book on the su...


In [12]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,stop_words='english')

In [13]:
vector = cv.fit_transform(new['tags']).toarray()

In [14]:
vector.shape

(4157, 5000)

In [15]:
from sklearn.metrics.pairwise import cosine_similarity

In [16]:
similarity = cosine_similarity(vector)

In [17]:
similarity[0]

array([1.        , 0.29955865, 0.05913124, ..., 0.01492704, 0.        ,
       0.01297498])

In [18]:
new['Title'] == 'Ways of Seeing'

0        True
1       False
2       False
3       False
4       False
        ...  
4152    False
4153    False
4154    False
4155    False
4156    False
Name: Title, Length: 4157, dtype: bool

In [19]:
new[new['Title'] == 'Ways of Seeing']

Unnamed: 0,Title,tags
0,Ways of Seeing,John Berger’s Classic Text on Art\n\nWays of S...


In [20]:
index=new[new['Title'] == 'Ways of Seeing'].index[0]

In [21]:
list(enumerate(similarity[index]))

[(0, 0.9999999999999996),
 (1, 0.29955864532492316),
 (2, 0.05913123959890826),
 (3, 0.08772689266130253),
 (4, 0.11149893466761208),
 (5, 0.1651445647689541),
 (6, 0.15231794896123557),
 (7, 0.211950261485958),
 (8, 0.034567534539301475),
 (9, 0.270801280154532),
 (10, 0.34952331447758644),
 (11, 0.03126526997403612),
 (12, 0.11488690226654374),
 (13, 0.05372153093502535),
 (14, 0.08280260997940048),
 (15, 0.30410838947835145),
 (16, 0.009358472761968363),
 (17, 0.23709558638633974),
 (18, 0.09782319760890369),
 (19, 0.09819304088496758),
 (20, 0.10889310129609417),
 (21, 0.225912997889962),
 (22, 0.05133270023393453),
 (23, 0.09756156783416058),
 (24, 0.2699352029174866),
 (25, 0.14360673947588817),
 (26, 0.2581092840218365),
 (27, 0.16527373535168174),
 (28, 0.10879853497231114),
 (29, 0.17849555373113413),
 (30, 0.09075643069909582),
 (31, 0.1762673980835868),
 (32, 0.07872958216222169),
 (33, 0.055395498883116824),
 (34, 0.29398429835072426),
 (35, 0.11032175315897381),
 (36, 0.03

In [22]:
 distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])

In [23]:
def recommend(movie):
    index = new[new['Title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    items = []
    for i in distances[1:6]:
        
        item = []
        item.append(data.iloc[i[0]].Title)
        item.append(data.iloc[i[0]].Author)
        item.append(data.iloc[i[0]].Image_Url)
        items.append(item)
    return items
    
        
        
    
    

In [24]:
distances

[(0, 0.9999999999999996),
 (10, 0.34952331447758644),
 (15, 0.30410838947835145),
 (1, 0.29955864532492316),
 (34, 0.29398429835072426),
 (9, 0.270801280154532),
 (24, 0.2699352029174866),
 (26, 0.2581092840218365),
 (17, 0.23709558638633974),
 (21, 0.225912997889962),
 (7, 0.211950261485958),
 (3702, 0.1951317922258849),
 (3767, 0.18286012835299778),
 (2831, 0.18282740951546758),
 (29, 0.17849555373113413),
 (31, 0.1762673980835868),
 (2848, 0.17588161767036214),
 (3722, 0.17560871143254547),
 (2815, 0.17446578944971328),
 (1818, 0.17431926281167376),
 (1906, 0.1725528587906171),
 (883, 0.17236256333167305),
 (44, 0.16898159235484367),
 (27, 0.16527373535168174),
 (3456, 0.16514456476895412),
 (5, 0.1651445647689541),
 (2222, 0.16448792373994225),
 (1795, 0.16188893170327057),
 (815, 0.15950063016128296),
 (3220, 0.15745916432444337),
 (2099, 0.15542413245411338),
 (6, 0.15231794896123557),
 (3215, 0.15194743527951726),
 (750, 0.1515470460848165),
 (423, 0.15114173098063566),
 (2589, 

In [25]:
items = []
for i in distances[1:6]:
    print (data.iloc[i[0]])
    
    
    item = []
    
        
        
    
    item.append(data.iloc[i[0]].Title)
    item.append(data.iloc[i[0]].Author)
    item.append(data.iloc[i[0]].Image_Url)
    items.append(item)



Title                                       Seven Days in the Art World
Author                                                   Sarah Thornton
Description           Named one of the best art books of 2008 by The...
Image_Url             https://images-na.ssl-images-amazon.com/images...
Average_Rating                                                     3.62
Total_Rating_Count                                               65,780
Name: 10, dtype: object
Title                                              Art Through the Ages
Author                                                  Fred S. Kleiner
Description           The market-leading text for the art history su...
Image_Url             https://images-na.ssl-images-amazon.com/images...
Average_Rating                                                     3.84
Total_Rating_Count                                               41,641
Name: 15, dtype: object
Title                                                  The Story of Art
Author          

In [26]:
item

['Concerning the Spiritual in Art',
 'Wassily Kandinsky',
 'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1405982387i/857502.jpg']

In [27]:
items

[['Seven Days in the Art World',
  'Sarah Thornton',
  'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1358748500i/6988014.jpg'],
 ['Art Through the Ages',
  'Fred S. Kleiner',
  'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1168391513i/32525.jpg'],
 ['The Story of Art',
  'E.H. Gombrich',
  'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1389274650i/222078.jpg'],
 ['The Power of Art',
  'Simon Schama',
  'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1385316815i/19797.jpg'],
 ['Concerning the Spiritual in Art',
  'Wassily Kandinsky',
  'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1405982387i/857502.jpg']]

In [28]:
recommend('Harry Potter and the Deathly Hallows')

[['Harry Potter and the Chamber of Secrets',
  'J.K. Rowling',
  'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1474169725i/15881.jpg'],
 ['Harry Potter and the Goblet of Fire',
  'J.K. Rowling',
  'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1554006152i/6.jpg'],
 ['Harry Potter and the Order of the Phoenix',
  'J.K. Rowling',
  'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1546910265i/2.jpg'],
 ['Harry Potter and the Sorcerer’s Stone',
  'J.K. Rowling',
  'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1598823299i/42844155.jpg'],
 ['Harry Potter and the Half-Blood Prince',
  'J.K. Rowling',
  'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1587697303i/1.jpg']]

In [30]:
import pickle
pickle.dump(data,open('data.pkl','wb'))
pickle.dump(new,open('new.pkl','wb'))
pickle.dump(similarity,open('similarity.pkl','wb'))