In [2]:
import pandas as pd
import numpy as np
import csv
books = pd.read_csv("book_data.csv",encoding='ISO-8859-1')

In [3]:
books.head()

Unnamed: 0,Id,ISBN,Book_id,Best_Book_Id,Book_Title,Book_Author,Year_of_Publication,Publisher,Original_Title,Avg_Rating,Books_Count,Rating_Count,Description
0,1.0,195153448,13624209,210834,Kim,Rudyard Kipling,1901,Wilco publishing house,Kim,3.72,445,31684,Kim is set in an imperialistic world; a world ...
1,2.0,2005018,846427,846429,Classical Mythology,Mark P.O. Morford,1977,"Oxford University Press, USA",Classical Mythology,4.11,44,1177,"Featuring the authors' extensive, clear, and f..."
2,3.0,60973129,969335,241664,Clara Callan,Richard B. Wright,2001,,Clara Callan,3.87,32,8456,"In a small town in Canada, Clara Callan reluct..."
3,4.0,374157065,3767482,1508654,Decision in Normandy,Carlo D'Este,1983,Harper Perennial,Decision in Normandy,4.03,18,692,"Here, for the first time in paperback, is an o..."
4,5.0,393045218,893390,763331,Flu: The Story Of The Great Influenza Pandemic...,Gina Kolata,1999,Farrar Straus Giroux,Flu: The Story of the Great Influenza Pandemic,3.9,25,5450,"The fascinating, true story of the world's dea..."


In [4]:
books = books.drop_duplicates()

In [5]:
books.head(4)['Description']

0    Kim is set in an imperialistic world; a world ...
1    Featuring the authors' extensive, clear, and f...
2    In a small town in Canada, Clara Callan reluct...
3    Here, for the first time in paperback, is an o...
Name: Description, dtype: object

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfv=TfidfVectorizer(min_df=3, max_features = None,
                    strip_accents = 'unicode', analyzer = 'word', token_pattern=r'\w{1,}',
                    ngram_range = (1,3),
                    stop_words = 'english')

books['Description']=books['Description'].fillna('')

In [7]:
tfv_matrix = tfv.fit_transform(books['Description'])

In [8]:
tfv_matrix

<9872x47567 sparse matrix of type '<class 'numpy.float64'>'
	with 647082 stored elements in Compressed Sparse Row format>

In [9]:
tfv_matrix.shape

(9872, 47567)

In [10]:
from sklearn.metrics.pairwise import sigmoid_kernel

sig = sigmoid_kernel(tfv_matrix, tfv_matrix)

In [11]:
sig[0]

array([0.76160298, 0.76159443, 0.76159442, ..., 0.76159417, 0.76159432,
       0.76159435])

In [12]:
indices = pd.Series(books.index, index = books['Original_Title']).drop_duplicates()

In [13]:
indices

Original_Title
Kim                                                                                          0
Classical Mythology                                                                          1
Clara Callan                                                                                 2
Decision in Normandy                                                                         3
Flu: The Story of the Great Influenza Pandemic                                               4
                                                                                          ... 
Astronomy Made Simple                                                                     9867
Everything Cat: What Kids Really Want to Know About Cats (Kids' FAQs)                     9868
Got Issues Much?: Celebrities Share Their Traumas and Triumphs                            9869
Friend or Foe?  (Outernet #1)                                                             9870
In the Land of the Lawn Weenies and

In [14]:
indices['Two for Texas']

1020

In [15]:
list(enumerate(sig[indices['Two for Texas']]))

[(0, 0.7615942084840417),
 (1, 0.7615941910954955),
 (2, 0.7615942691761798),
 (3, 0.7615941559557649),
 (4, 0.7615942123919917),
 (5, 0.7615941559557649),
 (6, 0.7615941722620643),
 (7, 0.7615942300347445),
 (8, 0.7615941559557649),
 (9, 0.7615941559557649),
 (10, 0.7615942151593654),
 (11, 0.7615942324707873),
 (12, 0.7615941559557649),
 (13, 0.7615942244455701),
 (14, 0.7615941559557649),
 (15, 0.7615942050085664),
 (16, 0.7615942300427466),
 (17, 0.7615941559557649),
 (18, 0.7615942577693458),
 (19, 0.761594185425447),
 (20, 0.7615944258860493),
 (21, 0.7615941806305528),
 (22, 0.7615942174769514),
 (23, 0.7615943918820552),
 (24, 0.7615941650571021),
 (25, 0.7615941559557649),
 (26, 0.7615942659826559),
 (27, 0.7615941559557649),
 (28, 0.7615942136042423),
 (29, 0.7615942798398542),
 (30, 0.7615941559557649),
 (31, 0.7615941559557649),
 (32, 0.7615941559557649),
 (33, 0.7615942638312411),
 (34, 0.7615942815140911),
 (35, 0.7615941559557649),
 (36, 0.7615943232966572),
 (37, 0.7615

In [16]:
sorted(list(enumerate(sig[indices['Two for Texas']])), key=lambda x: x[1], reverse=True)

[(1020, 0.7616029849257958),
 (7000, 0.7615956149660751),
 (8245, 0.7615951598448726),
 (1803, 0.761595097458989),
 (7800, 0.7615950861734839),
 (2420, 0.7615950307041842),
 (5309, 0.7615950205787294),
 (955, 0.761595017407911),
 (1551, 0.7615949585989589),
 (4007, 0.7615949493357533),
 (5212, 0.7615949493357533),
 (4295, 0.7615949490492576),
 (4966, 0.7615949405141125),
 (9601, 0.7615949354123651),
 (1405, 0.7615949259908841),
 (7681, 0.7615948735423387),
 (6231, 0.7615948721522079),
 (6931, 0.7615948158183468),
 (4239, 0.7615948049588143),
 (3409, 0.7615947869949884),
 (3706, 0.7615947854798534),
 (983, 0.7615947846011923),
 (9424, 0.7615947738244507),
 (7502, 0.7615947729350141),
 (4074, 0.7615947687795166),
 (8387, 0.7615947682903705),
 (8946, 0.7615947660487029),
 (7128, 0.7615947649507725),
 (5581, 0.761594759165677),
 (7509, 0.7615947450030697),
 (2084, 0.7615947400120306),
 (3072, 0.761594728208146),
 (778, 0.7615947249442926),
 (9361, 0.7615947233587393),
 (9405, 0.76159472279

In [27]:
def give_rec(title, sig=sig):
    idx=indices[title]
    sig_scores = list(enumerate(sig[idx]))
    sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)
    sig_scores = (sig_scores[1:11])
    book_indices = [i[0] for i in sig_scores]
    return books['Original_Title'].iloc[book_indices]
x=input('Enter book name:')
give_rec(x)

Enter book name:Kim


1017                                                Toxin
9536                             101 Wacky Computer Jokes
1546                                    The Farmer's Wife
7339                                      The Summer Tree
4000                                                  NaN
7046                      Demons Don't Dream (Xanth, #16)
4099                In Rajasthan (Lonely Planet Journeys)
2332    What Every American Should Know About the Rest...
7017    The New York Diaries: Too-True Tales of Urban ...
4488                                  Midnight's Children
Name: Original_Title, dtype: object