# Recommendation System - Collaborative filtering

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('book.csv')
df.head()

Unnamed: 0,ChildBks,YouthBks,CookBks,DoItYBks,RefBks,ArtBks,GeogBks,ItalCook,ItalAtlas,ItalArt,Florence
0,0,1,0,1,0,0,1,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0
3,1,1,1,0,1,0,1,0,0,0,0
4,0,0,1,0,0,0,1,0,0,0,0


In [3]:
df.shape

# There are 2000 readers and 11 books
# We have created user-item ratings matrix based on whether the user have rated the book or not

N = df.shape[0] # no of readers
M = df.shape[1] # no of books

### User-user collaborative filtering

In [4]:
# First we shall calculate similarity using cosine similarity metrics between reader i and all other readers. To group together 
# readers with similar taste for books

from sklearn.metrics import pairwise_distances 
from scipy.spatial.distance import cosine, correlation

reader_sim = 1 - pairwise_distances(df.values,metric='cosine') 


reader_sim_df = pd.DataFrame(reader_sim)         #Store the results in a dataframe
reader_sim_df.index = df.index.unique()          #Set the index and column names to user ids 
reader_sim_df.columns = df.index.unique()
reader_sim_df.iloc[0:5, 0:5]                     # Similarity matrix

Unnamed: 0,0,1,2,3,4
0,1.0,0.0,0.0,0.516398,0.408248
1,0.0,1.0,0.0,0.447214,0.0
2,0.0,0.0,1.0,0.0,0.0
3,0.516398,0.447214,0.0,1.0,0.632456
4,0.408248,0.0,0.0,0.632456,1.0


In [5]:
i = int(input('Enter reader id: (an integer between 0 and 1999) ')) # Accepting reader id

Enter reader id: (an integer between 0 and 1999) 9


In [6]:
# Reader i profile
df.iloc[i,:]

ChildBks     1
YouthBks     1
CookBks      1
DoItYBks     0
RefBks       0
ArtBks       0
GeogBks      1
ItalCook     0
ItalAtlas    0
ItalArt      0
Florence     0
Name: 9, dtype: int64

In [7]:
N = df.shape[0] # Total number of readers
K=30            # Limit for number of similar readers/neighbours
neighbors = []

try:
    print(f'\n\n\n Reader {i:<{7}} Number of books read:', df.iloc[i,:].value_counts()[1])
    
    for j in range(N):
        try:
            if (reader_sim_df[i][j] > 0.5) & (round(reader_sim_df[i][j],2) != 1) & (df.iloc[i,:].value_counts()[1] < df.iloc[j,:].value_counts()[1]):
                neighbors.append((reader_sim_df[i][j],j,df.iloc[j,:].value_counts()[1]))
                neighbors.sort(key = lambda x: x[0],reverse=True) 
                if len(neighbors)>K:
                    del neighbors[-1]
        except:
            continue

except:
    print(f'\n\n\n Reader {i:<{7}} New user, Cold start issue, Popular book recommended') #try bookid 2
    neighbors = []





 Reader 9       Number of books read: 4


In [8]:
len(neighbors)

30

In [9]:
print('\n(similarity score),(bookid),(number of books read)\n')
neighbors


(similarity score),(bookid),(number of books read)



[(0.8944271909999159, 3, 5),
 (0.8944271909999159, 98, 5),
 (0.8944271909999159, 158, 5),
 (0.8944271909999159, 362, 5),
 (0.8944271909999159, 442, 5),
 (0.8944271909999159, 503, 5),
 (0.8944271909999159, 540, 5),
 (0.8944271909999159, 629, 5),
 (0.8944271909999159, 659, 5),
 (0.8944271909999159, 698, 5),
 (0.8944271909999159, 851, 5),
 (0.8944271909999159, 897, 5),
 (0.8944271909999159, 977, 5),
 (0.8944271909999159, 1000, 5),
 (0.8944271909999159, 1004, 5),
 (0.8944271909999159, 1035, 5),
 (0.8944271909999159, 1059, 5),
 (0.8944271909999159, 1079, 5),
 (0.8944271909999159, 1150, 5),
 (0.8944271909999159, 1167, 5),
 (0.8944271909999159, 1188, 5),
 (0.8944271909999159, 1285, 5),
 (0.8944271909999159, 1290, 5),
 (0.8944271909999159, 1302, 5),
 (0.8944271909999159, 1348, 5),
 (0.8944271909999159, 1414, 5),
 (0.8944271909999159, 1455, 5),
 (0.8944271909999159, 1515, 5),
 (0.8944271909999159, 1545, 5),
 (0.8944271909999159, 1653, 5)]

In [10]:
if neighbors==[[]]:
    print('Read all') #try bookid 793

In [11]:
# Readers found similar to reader i 
n_idx = [neighbors[j][1] for j in range(0,len(neighbors))]
n_idx

[3,
 98,
 158,
 362,
 442,
 503,
 540,
 629,
 659,
 698,
 851,
 897,
 977,
 1000,
 1004,
 1035,
 1059,
 1079,
 1150,
 1167,
 1188,
 1285,
 1290,
 1302,
 1348,
 1414,
 1455,
 1515,
 1545,
 1653]

In [12]:
# List the books read by reader i
reader_books = []
for c, col in enumerate(df.columns):
    if df.iloc[i,c]==1:
        reader_books.append(col)
reader_books

['ChildBks', 'YouthBks', 'CookBks', 'GeogBks']

In [13]:
# Recommendation based on books rated the most by readers similar to reader i
books= df[df.index.isin(n_idx)].copy()
books.drop(columns=reader_books,inplace=True)
books= pd.DataFrame(books.columns, books.sum(axis=0).values,columns=['Books Recommended']).sort_index(ascending=False)
books[:3]

Unnamed: 0,Books Recommended
10,DoItYBks
7,RefBks
6,ArtBks


In [14]:
# recommendation based on highest rated book to resolve cold start issue
df.sum(axis=0).sort_values(ascending=False)[0:3]

# Cookbooks and Childbooks are most popular

CookBks     862
ChildBks    846
DoItYBks    564
dtype: int64

### Item-item collaborative filtering

In [15]:
# Refer assignment 14 Association rules

# Thank you!