In [1]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [2]:
books = pd.read_csv("Books.csv")
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [3]:
users = pd.read_csv("Users.csv")
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [4]:
ratings = pd.read_csv("Ratings.csv")
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


## Popularity Based Recommender system

In [5]:
ratings_with_name = ratings.merge(books, on="ISBN")
ratings_with_name.sample(3)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
603583,163973,553293915,7,Kat's Cradle (Kat Colorado Mysteries),Karen Kijewski,1992,Bantam,http://images.amazon.com/images/P/0553293915.0...,http://images.amazon.com/images/P/0553293915.0...,http://images.amazon.com/images/P/0553293915.0...
951010,252865,440208513,0,The Evening News,Arthur Hailey,1991,Dell,http://images.amazon.com/images/P/0440208513.0...,http://images.amazon.com/images/P/0440208513.0...,http://images.amazon.com/images/P/0440208513.0...
200428,52574,385494343,10,The Gift of Peace: Personal Reflections (Illin...,Joseph Cardinal Bernardin,1998,IMAGE,http://images.amazon.com/images/P/0385494343.0...,http://images.amazon.com/images/P/0385494343.0...,http://images.amazon.com/images/P/0385494343.0...


In [6]:
# How much votes in each book
num_rating_df = ratings_with_name.groupby("Book-Title").count()["Book-Rating"].reset_index()
num_rating_df.rename(columns = {"Book-Rating": "num_rating"}, inplace=True)
num_rating_df.head()

Unnamed: 0,Book-Title,num_rating
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1


In [7]:
# AVG Rating of each book
ratings_with_name['Book-Rating'] = pd.to_numeric(ratings_with_name['Book-Rating'], errors='coerce')
avg_rating_df = ratings_with_name.groupby('Book-Title')['Book-Rating'].mean().reset_index()
avg_rating_df.rename(columns={'Book-Rating': 'avg_rating'}, inplace=True)
avg_rating_df

Unnamed: 0,Book-Title,avg_rating
0,A Light in the Storm: The Civil War Diary of ...,2.250000
1,Always Have Popsicles,0.000000
2,Apple Magic (The Collector's series),0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,0.000000
...,...,...
241066,Ã?Â?lpiraten.,0.000000
241067,Ã?Â?rger mit Produkt X. Roman.,5.250000
241068,Ã?Â?sterlich leben.,7.000000
241069,Ã?Â?stlich der Berge.,2.666667


In [8]:
popular_df = num_rating_df.merge(avg_rating_df, on="Book-Title")
popular_df

Unnamed: 0,Book-Title,num_rating,avg_rating
0,A Light in the Storm: The Civil War Diary of ...,4,2.250000
1,Always Have Popsicles,1,0.000000
2,Apple Magic (The Collector's series),1,0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.000000
...,...,...,...
241066,Ã?Â?lpiraten.,2,0.000000
241067,Ã?Â?rger mit Produkt X. Roman.,4,5.250000
241068,Ã?Â?sterlich leben.,1,7.000000
241069,Ã?Â?stlich der Berge.,3,2.666667


In [9]:
popular_df = popular_df[popular_df["num_rating"] >= 250].sort_values(by="avg_rating", ascending=False)[:50]
popular_df.head()

Unnamed: 0,Book-Title,num_rating,avg_rating
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804
80422,Harry Potter and the Goblet of Fire (Book 4),387,5.824289
80441,Harry Potter and the Sorcerer's Stone (Book 1),278,5.73741
80426,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441
80414,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453


In [10]:
# Books has diff ISBN number for same books which is why its being repeated
popular_df.merge(books, on="Book-Title").head()

Unnamed: 0,Book-Title,num_rating,avg_rating,ISBN,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,439136350,J. K. Rowling,1999,Scholastic,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...
1,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,439136369,J. K. Rowling,2001,Scholastic,http://images.amazon.com/images/P/0439136369.0...,http://images.amazon.com/images/P/0439136369.0...,http://images.amazon.com/images/P/0439136369.0...
2,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,786222743,J. K. Rowling,2000,Thorndike Press,http://images.amazon.com/images/P/0786222743.0...,http://images.amazon.com/images/P/0786222743.0...,http://images.amazon.com/images/P/0786222743.0...
3,Harry Potter and the Goblet of Fire (Book 4),387,5.824289,439139597,J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...
4,Harry Potter and the Goblet of Fire (Book 4),387,5.824289,439139600,J. K. Rowling,2002,Scholastic Paperbacks,http://images.amazon.com/images/P/0439139600.0...,http://images.amazon.com/images/P/0439139600.0...,http://images.amazon.com/images/P/0439139600.0...


In [11]:
# Top 50 books as per avg votings
popular_df = popular_df.merge(books, on="Book-Title").drop_duplicates("Book-Title").reset_index().drop(columns="index")[['Book-Title','Book-Author',"Year-Of-Publication",'Image-URL-M','num_rating','avg_rating']]

In [12]:
popular_df

Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Image-URL-M,num_rating,avg_rating
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,http://images.amazon.com/images/P/0439136350.0...,428,5.852804
1,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,http://images.amazon.com/images/P/0439139597.0...,387,5.824289
2,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,1998,http://images.amazon.com/images/P/0590353403.0...,278,5.73741
3,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,http://images.amazon.com/images/P/043935806X.0...,347,5.501441
4,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,http://images.amazon.com/images/P/0439064872.0...,556,5.183453
5,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,1986,http://images.amazon.com/images/P/0345339681.0...,281,5.007117
6,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,1986,http://images.amazon.com/images/P/0345339703.0...,368,4.94837
7,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,1999,http://images.amazon.com/images/P/059035342X.0...,575,4.895652
8,"The Two Towers (The Lord of the Rings, Part 2)",J.R.R. TOLKIEN,1986,http://images.amazon.com/images/P/0345339711.0...,260,4.880769
9,To Kill a Mockingbird,Harper Lee,1988,http://images.amazon.com/images/P/0446310786.0...,510,4.7


# Colaborative Filtering

In [22]:
# only thouse user who has minimum 200 votes
x = ratings_with_name.groupby("User-ID").count()["Book-Rating"] > 200

In [26]:
users = x[x].index

In [30]:
# These are the users who have done rating over 200
filtered_rating = ratings_with_name[ratings_with_name["User-ID"].isin(users)]
filtered_rating.sample(5)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
200753,52584,0345370724,0,Killer Diller,Clyde Edgerton,1992,Ballantine Books,http://images.amazon.com/images/P/0345370724.0...,http://images.amazon.com/images/P/0345370724.0...,http://images.amazon.com/images/P/0345370724.0...
49877,11676,285940659X,0,Cap sur la gloire,Kent,2000,PhÃ?Â©bus,http://images.amazon.com/images/P/285940659X.0...,http://images.amazon.com/images/P/285940659X.0...,http://images.amazon.com/images/P/285940659X.0...
854124,230522,0893752770,0,Friendly Snowman (First-Start Easy Readers (Pa...,Sharon Gordon,1980,Troll Communications,http://images.amazon.com/images/P/0893752770.0...,http://images.amazon.com/images/P/0893752770.0...,http://images.amazon.com/images/P/0893752770.0...
262816,69697,0505525216,0,Bait &amp; Switch (Love Spell Contemporary Rom...,Darlene Gardner,2002,Love Spell,http://images.amazon.com/images/P/0505525216.0...,http://images.amazon.com/images/P/0505525216.0...,http://images.amazon.com/images/P/0505525216.0...
60894,14521,080411840X,0,Malice in London (Erskine Powell Mysteries),Graham Thomas,2000,Fawcett Books,http://images.amazon.com/images/P/080411840X.0...,http://images.amazon.com/images/P/080411840X.0...,http://images.amazon.com/images/P/080411840X.0...


In [32]:
# Need books who has 50 or more than votes
filtered_rating.groupby("Book-Title").count()["Book-Rating"] 

Book-Title
 A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)    2
 Always Have Popsicles                                                                                        1
 Apple Magic (The Collector's series)                                                                         1
 Beyond IBM: Leadership Marketing and Finance for the 1990s                                                   1
 Clifford Visita El Hospital (Clifford El Gran Perro Colorado)                                                1
                                                                                                             ..
Ã?Â?ber das Fernsehen.                                                                                        2
Ã?Â?ber die Pflicht zum Ungehorsam gegen den Staat.                                                           3
Ã?Â?lpiraten.                                                                                

In [36]:
y = filtered_rating.groupby("Book-Title").count()["Book-Rating"] >=50
print(y.head())
famous_books = y[y].index

Book-Title
A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)    False
Always Have Popsicles                                                                                        False
Apple Magic (The Collector's series)                                                                         False
Beyond IBM: Leadership Marketing and Finance for the 1990s                                                   False
Clifford Visita El Hospital (Clifford El Gran Perro Colorado)                                                False
Name: Book-Rating, dtype: bool


In [41]:
# Books with more than 50 ratings each
final_ratings = filtered_rating[filtered_rating["Book-Title"].isin(famous_books)]
final_ratings

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
1150,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...
1163,277427,0060930535,0,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999,Perennial,http://images.amazon.com/images/P/0060930535.0...,http://images.amazon.com/images/P/0060930535.0...,http://images.amazon.com/images/P/0060930535.0...
1165,277427,0060934417,0,Bel Canto: A Novel,Ann Patchett,2002,Perennial,http://images.amazon.com/images/P/0060934417.0...,http://images.amazon.com/images/P/0060934417.0...,http://images.amazon.com/images/P/0060934417.0...
1168,277427,0061009059,9,One for the Money (Stephanie Plum Novels (Pape...,Janet Evanovich,1995,HarperTorch,http://images.amazon.com/images/P/0061009059.0...,http://images.amazon.com/images/P/0061009059.0...,http://images.amazon.com/images/P/0061009059.0...
1174,277427,006440188X,0,The Secret Garden,Frances Hodgson Burnett,1998,HarperTrophy,http://images.amazon.com/images/P/006440188X.0...,http://images.amazon.com/images/P/006440188X.0...,http://images.amazon.com/images/P/006440188X.0...
...,...,...,...,...,...,...,...,...,...,...
1029196,275970,1400031354,0,Tears of the Giraffe (No.1 Ladies Detective Ag...,Alexander McCall Smith,2002,Anchor,http://images.amazon.com/images/P/1400031354.0...,http://images.amazon.com/images/P/1400031354.0...,http://images.amazon.com/images/P/1400031354.0...
1029197,275970,1400031362,0,Morality for Beautiful Girls (No.1 Ladies Dete...,Alexander McCall Smith,2002,Anchor,http://images.amazon.com/images/P/1400031362.0...,http://images.amazon.com/images/P/1400031362.0...,http://images.amazon.com/images/P/1400031362.0...
1029270,275970,1573229725,0,Fingersmith,Sarah Waters,2002,Riverhead Books,http://images.amazon.com/images/P/1573229725.0...,http://images.amazon.com/images/P/1573229725.0...,http://images.amazon.com/images/P/1573229725.0...
1029309,275970,1586210661,9,Me Talk Pretty One Day,David Sedaris,2001,Time Warner Audio Major,http://images.amazon.com/images/P/1586210661.0...,http://images.amazon.com/images/P/1586210661.0...,http://images.amazon.com/images/P/1586210661.0...


In [47]:
# THis is the final grid where each book was rating more than 50 times and each user has rated more than 200 times
pt = final_ratings.pivot_table(index="Book-Title", columns="User-ID", values="Book-Rating")
pt

User-ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,,,,,,,,,,...,10.0,,,,,,0.0,,,
1st to Die: A Novel,,,,,,,,,,9.0,...,,,,,,,,,,
2nd Chance,,10.0,,,,,,,,0.0,...,,,,,,0.0,,,0.0,
4 Blondes,,,,,,,,0.0,,,...,,,,,,,,,,
A Bend in the Road,0.0,,7.0,,,,,,,,...,,0.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,,,,7.0,,,,,,0.0,...,,9.0,,,,,0.0,,,
You Belong To Me,,,,,,,,,0.0,,...,,,,,,,,,,
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,,,,,0.0,,,0.0,,,...,,,,,,,0.0,,,
Zoya,,,,,,,,,,,...,,0.0,,,,,,,,


In [49]:
pt.fillna(0,inplace=True)
pt

User-ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [51]:
from sklearn.metrics.pairwise import cosine_similarity
similarity_score = cosine_similarity(pt) # Eucliden distance of 706 books with each of the 706 books
similarity_score # Score for each book with each book

array([[1.        , 0.10255025, 0.01220856, ..., 0.12110367, 0.07347567,
        0.04316046],
       [0.10255025, 1.        , 0.2364573 , ..., 0.07446129, 0.16773875,
        0.14263397],
       [0.01220856, 0.2364573 , 1.        , ..., 0.04558758, 0.04938579,
        0.10796119],
       ...,
       [0.12110367, 0.07446129, 0.04558758, ..., 1.        , 0.07085128,
        0.0196177 ],
       [0.07347567, 0.16773875, 0.04938579, ..., 0.07085128, 1.        ,
        0.10602962],
       [0.04316046, 0.14263397, 0.10796119, ..., 0.0196177 , 0.10602962,
        1.        ]])

In [99]:
def recommend(book_name):
    index = np.where(pt.index == book_name)[0][0] # Finding the index of the recieved book in the pivot table
    similar_items = sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1], reverse=True)[1:11]

    for i in similar_items:
        print(pt.index[i[0]])

In [100]:
recommend("You Belong To Me")

Loves Music, Loves to Dance
I'll Be Seeing You
Before I Say Good-Bye
Daddy's Little Girl
All Around the Town
While My Pretty One Sleeps
My Gal Sunday
Moonlight Becomes You
The Cradle Will Fall
Let Me Call You Sweetheart


In [117]:
def recommend(book_name):
    # index fetch
    index = np.where(pt.index==book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1],reverse=True)[1:5]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        
        data.append(item)
    
    return data

In [118]:
recommend("You Belong To Me")

[['Loves Music, Loves to Dance',
  'Mary Higgins Clark',
  'http://images.amazon.com/images/P/0671758896.01.MZZZZZZZ.jpg'],
 ["I'll Be Seeing You",
  'Mary Higgins Clark',
  'http://images.amazon.com/images/P/0671888587.01.MZZZZZZZ.jpg'],
 ['Before I Say Good-Bye',
  'Mary Higgins Clark',
  'http://images.amazon.com/images/P/0671004573.01.MZZZZZZZ.jpg'],
 ["Daddy's Little Girl",
  'Mary Higgins Clark',
  'http://images.amazon.com/images/P/0743206045.01.MZZZZZZZ.jpg']]

In [105]:
import pickle 
pickle.dump(popular_df,open("popular.pkl", "wb"))

In [119]:
pickle.dump(pt,open("pt.pkl", "wb"))
pickle.dump(books,open("books.pkl", "wb"))
pickle.dump(similarity_score,open("similarity_score.pkl", "wb"))

# User_user recommendation

In [101]:
# Your original pivot table (Books x Users)
# pt.head()

# 1. Transpose the pivot table so Users are rows and Books are columns
user_item_matrix = pt.T
# user_item_matrix.head()

# 2. Calculate cosine similarity on the transposed matrix
# Now we are comparing each user (row) to every other user (row)
from sklearn.metrics.pairwise import cosine_similarity
user_similarity_scores = cosine_similarity(user_item_matrix)

# This user_similarity_scores is now a (num_users x num_users) matrix
# print(user_similarity_scores.shape)

# 3. Build a recommendation function based on user similarity
def recommend_for_user(user_id):
    # Find the index of the user in the transposed matrix's index
    try:
        user_index = np.where(user_item_matrix.index == user_id)[0][0]
    except IndexError:
        print(f"User {user_id} not found in the dataset.")
        return

    # Find the most similar user (neighbor)
    # We sort by similarity score, excluding the user themselves ([1])
    similar_users = sorted(list(enumerate(user_similarity_scores[user_index])), key=lambda x: x[1], reverse=True)
    
    # Check if there are any other users to recommend from
    if len(similar_users) < 2:
        print("Could not find a similar user to generate recommendations.")
        return

    # Get the index of the top similar user
    top_neighbor_index = similar_users[1][0]
    
    # Get the books rated by the top neighbor
    neighbor_rated_books = user_item_matrix.iloc[top_neighbor_index]
    neighbor_liked_books = neighbor_rated_books[neighbor_rated_books > 0].index # Get books they rated positively

    # Get the books rated by the target user
    target_user_rated_books = user_item_matrix.iloc[user_index]
    target_user_seen_books = target_user_rated_books[target_user_rated_books > 0].index

    # Find books the neighbor liked but the target user hasn't seen
    recommendations = [book for book in neighbor_liked_books if book not in target_user_seen_books]

    print(f"Recommendations for User {user_id} based on their 'taste twin':\n" + "-"*50)
    for book in recommendations[:10]: # Return top 10
        print(book)

# Example usage with a user ID from your pivot table
# You can get a user ID from pt.columns[0]
test_user_id = pt.columns[10] # Using the 11th user as an example
recommend_for_user(test_user_id)

Recommendations for User 6563 based on their 'taste twin':
--------------------------------------------------
Chicken Soup for the Soul (Chicken Soup for the Soul)
Harry Potter and the Chamber of Secrets (Book 2)
Harry Potter and the Prisoner of Azkaban (Book 3)
Harry Potter and the Sorcerer's Stone (Book 1)
