In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [3]:
data = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Data/Book Dataset.csv',sep=",", encoding='cp1252')

In [4]:
data.head(5)

Unnamed: 0,title,author,genre
0,The Noonday Demon: An Atlas of Depression,Andrew Solomon,Biography Non-Fiction
1,"First, We make the Beast Beautiful",Sarah Wilson,Autobiography Non-Fiction
2,Feeling Good: The new mood therapy,David Burns,Non-Fiction
3,The Happiness Trap: How to stop struggling and...,Russ Harris,Non-Fiction
4,Self-Compassion: The proven power of being Kin...,Kristin Neff,Non-Fiction


In [5]:
data.dtypes

title     object
author    object
genre     object
dtype: object

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   title   63 non-null     object
 1   author  63 non-null     object
 2   genre   63 non-null     object
dtypes: object(3)
memory usage: 1.6+ KB


In [7]:
data['genre'] = data['genre'].astype('str')
data['genre'] = data['genre'].str.split() 

In [8]:
data.head(10)

Unnamed: 0,title,author,genre
0,The Noonday Demon: An Atlas of Depression,Andrew Solomon,"[Biography, Non-Fiction]"
1,"First, We make the Beast Beautiful",Sarah Wilson,"[Autobiography, Non-Fiction]"
2,Feeling Good: The new mood therapy,David Burns,[Non-Fiction]
3,The Happiness Trap: How to stop struggling and...,Russ Harris,[Non-Fiction]
4,Self-Compassion: The proven power of being Kin...,Kristin Neff,[Non-Fiction]
5,"Happy, Okay?",M.J. Fievre,[Poetic]
6,F*ck Feelings,Michael I. Bennett and Sarah Bennett,[Non-Fiction]
7,It's Kind of a Funny Story,Ned Vizzini,"[Young, Adult, Fiction]"
8,Reasons to Stay Alive,Matt Haig,"[Non-Fiction, Autobiography, Memoir]"
9,Depression Hates a Moving Target,Nita Sweeney,"[Biography, Non-Fiction]"


In [9]:
s = data.apply(lambda x: pd.Series(x['genre']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'genre'
data=data.drop('genre', axis=1).join(s)

In [10]:
data.head(10)

Unnamed: 0,title,author,genre
0,The Noonday Demon: An Atlas of Depression,Andrew Solomon,Biography
0,The Noonday Demon: An Atlas of Depression,Andrew Solomon,Non-Fiction
1,"First, We make the Beast Beautiful",Sarah Wilson,Autobiography
1,"First, We make the Beast Beautiful",Sarah Wilson,Non-Fiction
2,Feeling Good: The new mood therapy,David Burns,Non-Fiction
3,The Happiness Trap: How to stop struggling and...,Russ Harris,Non-Fiction
4,Self-Compassion: The proven power of being Kin...,Kristin Neff,Non-Fiction
5,"Happy, Okay?",M.J. Fievre,Poetic
6,F*ck Feelings,Michael I. Bennett and Sarah Bennett,Non-Fiction
7,It's Kind of a Funny Story,Ned Vizzini,Young


In [11]:
data['index']=data.index
data.head(10)

Unnamed: 0,title,author,genre,index
0,The Noonday Demon: An Atlas of Depression,Andrew Solomon,Biography,0
0,The Noonday Demon: An Atlas of Depression,Andrew Solomon,Non-Fiction,0
1,"First, We make the Beast Beautiful",Sarah Wilson,Autobiography,1
1,"First, We make the Beast Beautiful",Sarah Wilson,Non-Fiction,1
2,Feeling Good: The new mood therapy,David Burns,Non-Fiction,2
3,The Happiness Trap: How to stop struggling and...,Russ Harris,Non-Fiction,3
4,Self-Compassion: The proven power of being Kin...,Kristin Neff,Non-Fiction,4
5,"Happy, Okay?",M.J. Fievre,Poetic,5
6,F*ck Feelings,Michael I. Bennett and Sarah Bennett,Non-Fiction,6
7,It's Kind of a Funny Story,Ned Vizzini,Young,7


In [12]:
data = pd.DataFrame(data, index=data['index'])
data.reset_index(drop=True,inplace=True)

In [13]:
data.shape

(102, 4)

In [14]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['genre'])
tfidf_matrix.shape

(102, 17)

In [15]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [16]:
indices = pd.Series(data.index, index=data['genre'])

In [17]:
def get_recommendations(genre,cosine_sim=cosine_sim,num=10):
    idx=indices[genre]
    sim_scores=list(enumerate(cosine_sim[idx]))
    sim_scores=list(np.argsort(sim_scores[0][1]))
    sim_scores.reverse()
    top_similar=sim_scores[0:num+1]
    book_idx=[i for i in top_similar]
    ans=data[['title','author']].iloc[book_idx]
    ans=ans.sample(n=10)
    return ans

In [18]:
get_recommendations('Biography')

Unnamed: 0,title,author
15,Depression Hates a Moving Target,Nita Sweeney
84,I've never been (un)happier,Shaheen Bhatt
22,The Crying Book,Heather Christle
47,Love You Like the Sky: Surviving the Suicide o...,Sarah Neustadter
59,This Close to Happy: A Reckoning with Depression,Daphne Merkin
18,Body Full of Stars,Molly Caro May
61,Darkness Visible: A Memoir of Madness,William Styron
30,Hello I Want to Die Please Fix Me: Depression ...,Anna Mehler Paperny
35,Depression the Comedy: A Tale of Perseverance,Jessica Holmes
0,The Noonday Demon: An Atlas of Depression,Andrew Solomon


In [19]:
def get_recommendation(genre):
    df=data[data['genre']==genre]
    ans=df[['title','author']]
    if(len(ans)<10):
      return ans
    else:
      return ans.sample(n=10)

In [20]:
get_recommendation('Autobiography')

Unnamed: 0,title,author
85,I've never been (un)happier,Shaheen Bhatt
62,Darkness Visible: A Memoir of Madness,William Styron
26,Prozac Nation: Young and Depressed in America,Elizabeth Wurtzel
13,Reasons to Stay Alive,Matt Haig
40,The Valedictorian of Being Dead,Heather B. Armstrong
53,An Unquiet Mind: A Memoir of Moods and Madness,Kay Redfield Jamison
56,Furiously Happy: A Funny Book About Horrible T...,Jenny Lawson
31,Hello I Want to Die Please Fix Me: Depression ...,Anna Mehler Paperny
2,"First, We make the Beast Beautiful",Sarah Wilson
23,The Crying Book,Heather Christle


In [21]:
import pickle
pickle.dump(data,open('/content/drive/My Drive/Colab Notebooks/pkl/book_list.pkl','wb'))
pickle.dump(cosine_sim,open('/content/drive/My Drive/Colab Notebooks/pkl/cosine_sim_book.pkl','wb'))