In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
import nltk
from nltk.stem.porter import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv("kindle_data-v2.csv")
df.head()

Unnamed: 0,asin,title,author,soldBy,imgUrl,productURL,stars,reviews,price,isKindleUnlimited,category_id,isBestSeller,isEditorsPick,isGoodReadsChoice,publishedDate,category_name
0,B00TZE87S4,Adult Children of Emotionally Immature Parents...,Lindsay C. Gibson,Amazon.com Services LLC,https://m.media-amazon.com/images/I/713KZTsaYp...,https://www.amazon.com/dp/B00TZE87S4,4.8,0,9.99,False,6,True,False,False,2015-06-01,Parenting & Relationships
1,B08WCKY8MB,"From Strength to Strength: Finding Success, Ha...",Arthur C. Brooks,Penguin Group (USA) LLC,https://m.media-amazon.com/images/I/A1LZcJFs9E...,https://www.amazon.com/dp/B08WCKY8MB,4.4,0,16.99,False,6,False,False,False,2022-02-15,Parenting & Relationships
2,B09KPS84CJ,Good Inside: A Guide to Becoming the Parent Yo...,Becky Kennedy,HarperCollins Publishers,https://m.media-amazon.com/images/I/71RIWM0sv6...,https://www.amazon.com/dp/B09KPS84CJ,4.8,0,16.99,False,6,False,True,False,2022-09-13,Parenting & Relationships
3,B07S7QPG6J,Everything I Know About Love: A Memoir,Dolly Alderton,HarperCollins Publishers,https://m.media-amazon.com/images/I/71QdQpTiKZ...,https://www.amazon.com/dp/B07S7QPG6J,4.2,0,9.95,True,6,False,True,False,2020-02-25,Parenting & Relationships
4,B00N6PEQV0,The Seven Principles for Making Marriage Work:...,John Gottman,Random House LLC,https://m.media-amazon.com/images/I/813o4WOs+w...,https://www.amazon.com/dp/B00N6PEQV0,4.7,0,13.99,False,6,False,False,False,2015-05-05,Parenting & Relationships


In [3]:
df.dropna(inplace=True)

In [4]:
df['tags'] = df['author']+df['soldBy']+df['category_name']
df['tags']

0         Lindsay C. GibsonAmazon.com Services LLCParent...
1         Arthur C. BrooksPenguin Group (USA) LLCParenti...
2         Becky KennedyHarperCollins PublishersParenting...
3         Dolly AldertonHarperCollins PublishersParentin...
4         John GottmanRandom House LLCParenting & Relati...
                                ...                        
133097                       Leo TolstoyDe MarqueNonfiction
133098           Mo GawdatAmazon.com Services LLCNonfiction
133099                   Alexandre DumasDe MarqueNonfiction
133100                       Leo TolstoyDe MarqueNonfiction
133101                       Jane AustenDe MarqueNonfiction
Name: tags, Length: 77800, dtype: object

In [5]:
df['tags'] = df['tags'].str.replace("(","").str.replace(")","").str.replace("[","").str.replace(".","").str.replace("]","").str.replace(",","").str.replace("|","").str.lower()

  df['tags'] = df['tags'].str.replace("(","").str.replace(")","").str.replace("[","").str.replace(".","").str.replace("]","").str.replace(",","").str.replace("|","").str.lower()


In [6]:
df['tags']

0         lindsay c gibsonamazoncom services llcparentin...
1         arthur c brookspenguin group usa llcparenting ...
2         becky kennedyharpercollins publishersparenting...
3         dolly aldertonharpercollins publishersparentin...
4         john gottmanrandom house llcparenting & relati...
                                ...                        
133097                       leo tolstoyde marquenonfiction
133098            mo gawdatamazoncom services llcnonfiction
133099                   alexandre dumasde marquenonfiction
133100                       leo tolstoyde marquenonfiction
133101                       jane austende marquenonfiction
Name: tags, Length: 77800, dtype: object

In [7]:
data = df[['title','tags']]
data.head()

Unnamed: 0,title,tags
0,Adult Children of Emotionally Immature Parents...,lindsay c gibsonamazoncom services llcparentin...
1,"From Strength to Strength: Finding Success, Ha...",arthur c brookspenguin group usa llcparenting ...
2,Good Inside: A Guide to Becoming the Parent Yo...,becky kennedyharpercollins publishersparenting...
3,Everything I Know About Love: A Memoir,dolly aldertonharpercollins publishersparentin...
4,The Seven Principles for Making Marriage Work:...,john gottmanrandom house llcparenting & relati...


## Text Vectorization

In [8]:
cv = CountVectorizer(max_features = 500, stop_words = 'english')

In [9]:
vector = cv.fit_transform(data['tags'])
vector

<77800x500 sparse matrix of type '<class 'numpy.int64'>'
	with 295219 stored elements in Compressed Sparse Row format>

In [10]:
vector = vector.toarray()
vector

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

### checking stop_words

In [11]:
print(cv.get_stop_words())

frozenset({'latterly', 'go', 'will', 'might', 'inc', 'fifteen', 'show', 'fill', 'hasnt', 'next', 'whereafter', 'each', 'when', 'across', 'therefore', 'what', 'alone', 'ltd', 'until', 'by', 'whose', 'more', 'describe', 'former', 'together', 'via', 'fire', 'am', 'must', 'three', 'well', 'afterwards', 'behind', 'noone', 'nine', 'thru', 'enough', 'those', 'done', 'whole', 'our', 'ourselves', 'between', 'such', 'one', 'find', 'why', 'several', 'off', 'nowhere', 'sixty', 'to', 'ie', 'thereupon', 'hereupon', 'up', 'has', 'yours', 'couldnt', 'formerly', 'out', 'almost', 'fifty', 'can', 'made', 'herein', 'give', 'however', 'becomes', 'me', 'ever', 'their', 'of', 'first', 'who', 'an', 'nor', 'eg', 'top', 'further', 'should', 'seems', 'thin', 'this', 'through', 'i', 'onto', 'where', 'hundred', 'hers', 'etc', 'would', 'namely', 'none', 'every', 'detail', 'sincere', 'some', 'around', 'system', 'besides', 'forty', 'less', 'was', 'then', 'into', 'please', 'same', 'above', 'last', 'over', 'there', 'de

### getting features names

In [12]:
list(cv.get_feature_names_out())[:5]

['aaron', 'adam', 'adamsamazoncom', 'adult', 'al']

### Stemming words

In [13]:
ps = PorterStemmer()

In [14]:
def stem(txt):
    lst = []
    for i in txt.split():
        lst.append(ps.stem(i))
        
    return " ".join(lst)    

In [15]:
stem(data['tags'][0])

'lindsay c gibsonamazoncom servic llcparent & relationship'

In [16]:
data['tags'] = data['tags'].apply(stem)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tags'] = data['tags'].apply(stem)


In [17]:
data.head(2)

Unnamed: 0,title,tags
0,Adult Children of Emotionally Immature Parents...,lindsay c gibsonamazoncom servic llcparent & r...
1,"From Strength to Strength: Finding Success, Ha...",arthur c brookspenguin group usa llcparent & r...


## Calculate Vectors

In [18]:
similarity = cosine_similarity(vector[:1000])

In [19]:
similarity

array([[1.        , 0.51639778, 0.40824829, ..., 0.57735027, 0.8660254 ,
        0.8660254 ],
       [0.51639778, 1.        , 0.31622777, ..., 0.4472136 , 0.4472136 ,
        0.4472136 ],
       [0.40824829, 0.31622777, 1.        , ..., 0.35355339, 0.35355339,
        0.35355339],
       ...,
       [0.57735027, 0.4472136 , 0.35355339, ..., 1.        , 0.5       ,
        0.5       ],
       [0.8660254 , 0.4472136 , 0.35355339, ..., 0.5       , 1.        ,
        0.75      ],
       [0.8660254 , 0.4472136 , 0.35355339, ..., 0.5       , 0.75      ,
        1.        ]])

# Recommendation

In [20]:
data['title'][1]

'From Strength to Strength: Finding Success, Happiness, and Deep Purpose in the Second Half of Life'

In [21]:
book_index = data[data['title']=='From Strength to Strength: Finding Success, Happiness, and Deep Purpose in the Second Half of Life'].index[0]
book_index

1

### Checking similarity of index 1 book

In [22]:
distances = similarity[book_index]
distances

array([0.51639778, 1.        , 0.31622777, 0.31622777, 0.4472136 ,
       0.2       , 0.8       , 0.51639778, 0.4472136 , 0.31622777,
       0.25819889, 0.18257419, 0.4472136 , 0.4472136 , 0.4472136 ,
       0.8       , 0.4472136 , 0.18257419, 0.89442719, 0.2       ,
       0.2236068 , 0.16903085, 0.89442719, 0.25819889, 0.51639778,
       0.51639778, 0.25819889, 0.8       , 0.25819889, 0.4472136 ,
       0.4472136 , 0.8       , 0.31622777, 0.89442719, 0.51639778,
       0.2       , 0.2236068 , 0.31622777, 0.4472136 , 0.25819889,
       0.4472136 , 0.51639778, 0.31622777, 0.4472136 , 0.31622777,
       0.4472136 , 0.89442719, 0.25819889, 0.4472136 , 0.4472136 ,
       0.25819889, 0.4472136 , 0.2236068 , 0.2236068 , 0.2236068 ,
       0.31622777, 0.31622777, 0.31622777, 0.2       , 0.31622777,
       0.4472136 , 0.8       , 0.51639778, 0.51639778, 0.89442719,
       0.51639778, 0.31622777, 0.4472136 , 0.25819889, 0.25819889,
       0.31622777, 0.18257419, 0.2       , 0.31622777, 0.44721

In [23]:
index_list = list(enumerate(distances))
index_list[:10]

[(0, 0.5163977794943223),
 (1, 0.9999999999999999),
 (2, 0.3162277660168379),
 (3, 0.3162277660168379),
 (4, 0.4472135954999579),
 (5, 0.19999999999999998),
 (6, 0.7999999999999999),
 (7, 0.5163977794943223),
 (8, 0.4472135954999579),
 (9, 0.3162277660168379)]

In [24]:
similar_books = sorted(index_list,reverse = True , key = lambda x: x[1])[1:11]
similar_books

[(18, 0.8944271909999159),
 (22, 0.8944271909999159),
 (33, 0.8944271909999159),
 (46, 0.8944271909999159),
 (64, 0.8944271909999159),
 (75, 0.8944271909999159),
 (104, 0.8944271909999159),
 (156, 0.8944271909999159),
 (230, 0.8944271909999159),
 (311, 0.8944271909999159)]

### Getting books title

In [25]:
for i in similar_books:
    print(data.iloc[i[0]].title)

Why Does He Do That?: Inside the Minds of Angry and Controlling Men
Scattered Minds: The Origins and Healing of Attention Deficit Disorder
The Bullet Journal Method: Track the Past, Order the Present, Design the Future
Drama Free: A Guide to Managing Unhealthy Family Relationships
Too Good to Leave, Too Bad to Stay: A Step-by-Step Guide to Help You Decide Whether to Stay In or Get Out of Your Relationship
The Book You Wish Your Parents Had Read: (And Your Children Will Be Glad That You Did)
There Was a Little Girl: The Real Story of My Mother and Me
Twelve Hours' Sleep by Twelve Weeks Old: A Step-by-Step Plan for Baby Sleep Success
Psychopath Free (Expanded Edition): Recovering from Emotionally Abusive Relationships With Narcissists, Sociopaths, and Other Toxic People
The Baby Sleep Solution: A Proven Program to Teach Your Baby to Sleep Twelve Hours aNight


### Making Function

In [26]:
def recommendation(book_name):
    book_index = data[data['title']==book_name].index[0]
    distances = similarity[book_index]
    index_list = list(enumerate(distances))
    similar_books = sorted(index_list,reverse=True,key = lambda x: x[1])[1:11]
    for i in similar_books:
        print(data.iloc[i[0]].title)

In [27]:
recommendation('From Strength to Strength: Finding Success, Happiness, and Deep Purpose in the Second Half of Life')

Why Does He Do That?: Inside the Minds of Angry and Controlling Men
Scattered Minds: The Origins and Healing of Attention Deficit Disorder
The Bullet Journal Method: Track the Past, Order the Present, Design the Future
Drama Free: A Guide to Managing Unhealthy Family Relationships
Too Good to Leave, Too Bad to Stay: A Step-by-Step Guide to Help You Decide Whether to Stay In or Get Out of Your Relationship
The Book You Wish Your Parents Had Read: (And Your Children Will Be Glad That You Did)
There Was a Little Girl: The Real Story of My Mother and Me
Twelve Hours' Sleep by Twelve Weeks Old: A Step-by-Step Plan for Baby Sleep Success
Psychopath Free (Expanded Edition): Recovering from Emotionally Abusive Relationships With Narcissists, Sociopaths, and Other Toxic People
The Baby Sleep Solution: A Proven Program to Teach Your Baby to Sleep Twelve Hours aNight


In [28]:
recommendation("Drama Free: A Guide to Managing Unhealthy Family Relationships")

Unmasking Autism: Discovering the New Faces of Neurodiversity
The Happiest Baby on the Block; Fully Revised and Updated Second Edition: The New Way to Calm Crying and Help Your Newborn Baby Sleep Longer
The Return (Pulitzer Prize Winner): Fathers, Sons and the Land in Between
Travelers to Unimaginable Lands: Stories of Dementia, the Caregiver, and the Human Brain
The Happiest Toddler on the Block: How to Eliminate Tantrums and Raise a Patient, Respectful and Cooperative One- to Four-Year-Old: Revised Edition
The Womanly Art of Breastfeeding: Completely Revised and Updated 8th Edition
The Reason I Jump: The Inner Voice of a Thirteen-Year-Old Boy with Autism
Queen Bees and Wannabes, 3rd Edition: Helping Your Daughter Survive Cliques, Gossip, Boys, and the New Realities of Girl World
We Should Not Be Friends: The Story of a Friendship
Dear Girls: Intimate Tales, Untold Secrets & Advice for Living Your Best Life


In [29]:
recommendation("The Baby Sleep Solution: A Proven Program to Teach Your Baby to Sleep Twelve Hours aNight")

The Happiest Baby on the Block; Fully Revised and Updated Second Edition: The New Way to Calm Crying and Help Your Newborn Baby Sleep Longer
The Return (Pulitzer Prize Winner): Fathers, Sons and the Land in Between
Travelers to Unimaginable Lands: Stories of Dementia, the Caregiver, and the Human Brain
The Happiest Toddler on the Block: How to Eliminate Tantrums and Raise a Patient, Respectful and Cooperative One- to Four-Year-Old: Revised Edition
The Womanly Art of Breastfeeding: Completely Revised and Updated 8th Edition
The Reason I Jump: The Inner Voice of a Thirteen-Year-Old Boy with Autism
Queen Bees and Wannabes, 3rd Edition: Helping Your Daughter Survive Cliques, Gossip, Boys, and the New Realities of Girl World
We Should Not Be Friends: The Story of a Friendship
Dear Girls: Intimate Tales, Untold Secrets & Advice for Living Your Best Life
Sweet Sleep: Nighttime and Naptime Strategies for the Breastfeeding Family


## Getting Image Url and Product URL of Recommended Books

In [30]:
def img_url(book_name):
    url = df[df['title'] == book_name]
    return url.iloc[0][['imgUrl','productURL']]
    

In [31]:
img_url('From Strength to Strength: Finding Success, Happiness, and Deep Purpose in the Second Half of Life')

imgUrl        https://m.media-amazon.com/images/I/A1LZcJFs9E...
productURL                 https://www.amazon.com/dp/B08WCKY8MB
Name: 1, dtype: object

In [32]:
def recommendation(book_name):
    book_index = data[data['title']==book_name].index[0]
    distances = similarity[book_index]
    index_list = list(enumerate(distances))
    similar_books = sorted(index_list,reverse=True,key = lambda x: x[1])[1:11]
    
    recomended_books = []
    books_poster = []
    
    for i in similar_books:
        recomended_books.append(data.iloc[i[0]].title)
        books_poster.append(img_url(book_name))
    return recomended_books,books_poster    

In [33]:
recommendation('From Strength to Strength: Finding Success, Happiness, and Deep Purpose in the Second Half of Life')

(['Why Does He Do That?: Inside the Minds of Angry and Controlling Men',
  'Scattered Minds: The Origins and Healing of Attention Deficit Disorder',
  'The Bullet Journal Method: Track the Past, Order the Present, Design the Future',
  'Drama Free: A Guide to Managing Unhealthy Family Relationships',
  'Too Good to Leave, Too Bad to Stay: A Step-by-Step Guide to Help You Decide Whether to Stay In or Get Out of Your Relationship',
  'The Book You Wish Your Parents Had Read: (And Your Children Will Be Glad That You Did)',
  'There Was a Little Girl: The Real Story of My Mother and Me',
  "Twelve Hours' Sleep by Twelve Weeks Old: A Step-by-Step Plan for Baby Sleep Success",
  'Psychopath Free (Expanded Edition): Recovering from Emotionally Abusive Relationships With Narcissists, Sociopaths, and Other Toxic People',
  'The Baby Sleep Solution: A Proven Program to Teach Your Baby to Sleep Twelve Hours aNight'],
 [imgUrl        https://m.media-amazon.com/images/I/A1LZcJFs9E...
  productURL  

In [34]:
recommendation("The Seven Principles for Making Marriage Work: A Practical Guide from the Country's Foremost Relationship Expert")

(['Homecoming: Reclaiming and Healing Your Inner Child',
  'Delivered from Distraction: Getting the Most out of Life with Attention Deficit Disorder',
  'And Baby Makes Three: The Six-Step Plan for Preserving Marital Intimacy and Rekindling Romance After Baby Arrives',
  'Unmasking Autism: Discovering the New Faces of Neurodiversity',
  'The Happiest Baby on the Block; Fully Revised and Updated Second Edition: The New Way to Calm Crying and Help Your Newborn Baby Sleep Longer',
  'The Return (Pulitzer Prize Winner): Fathers, Sons and the Land in Between',
  'Travelers to Unimaginable Lands: Stories of Dementia, the Caregiver, and the Human Brain',
  'The Happiest Toddler on the Block: How to Eliminate Tantrums and Raise a Patient, Respectful and Cooperative One- to Four-Year-Old: Revised Edition',
  'The Womanly Art of Breastfeeding: Completely Revised and Updated 8th Edition',
  'The Reason I Jump: The Inner Voice of a Thirteen-Year-Old Boy with Autism'],
 [imgUrl        https://m.med