In [1]:
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
ratings = pd.read_csv("kitaps.csv", index_col=0)

In [3]:
ratings

Unnamed: 0_level_0,action,romantic,history,science
users,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
user1,5.0,2.0,1.0,4.0
user2,2.0,1.0,5.0,5.0
user3,1.0,4.0,4.0,1.0
user4,1.0,5.0,3.0,2.0


In [4]:
def standardize(row):
    new_row = (row- row.mean())/(row.max()-row.min())
    return new_row

ratings_std = ratings.apply(standardize)
ratings_std

Unnamed: 0_level_0,action,romantic,history,science
users,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
user1,0.6875,-0.25,-0.5625,0.25
user2,-0.0625,-0.5,0.4375,0.5
user3,-0.3125,0.25,0.1875,-0.5
user4,-0.3125,0.5,-0.0625,-0.25


In [5]:
item_similarity = cosine_similarity(ratings_std.T)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.float


In [6]:
item_similarity

array([[ 1.00000000e+00, -5.78691387e-01, -7.47531930e-01,
         5.78691387e-01],
       [-5.78691387e-01,  1.00000000e+00, -1.06904497e-01,
        -9.00000000e-01],
       [-7.47531930e-01, -1.06904497e-01,  1.00000000e+00,
        -3.29066027e-17],
       [ 5.78691387e-01, -9.00000000e-01, -3.29066027e-17,
         1.00000000e+00]])

In [7]:
item_similarity_df = pd.DataFrame(item_similarity, index=ratings.columns, columns=ratings.columns)

In [8]:
item_similarity_df

Unnamed: 0,action,romantic,history,science
action,1.0,-0.578691,-0.7475319,0.5786914
romantic,-0.578691,1.0,-0.1069045,-0.9
history,-0.747532,-0.106904,1.0,-3.2906600000000003e-17
science,0.578691,-0.9,-3.2906600000000003e-17,1.0


In [14]:
def get_similar_book(book_name,user_rating):
    similar_score = item_similarity_df[book_name]*(user_rating-2.5)
    similar_score = similar_score.sort_values(ascending=False)
    
    return similar_score

In [15]:
get_similar_book("action",5)

action      2.500000
science     1.446728
romantic   -1.446728
history    -1.868830
Name: action, dtype: float64

In [16]:
get_similar_book("science",5)

science     2.500000e+00
action      1.446728e+00
history    -8.226651e-17
romantic   -2.250000e+00
Name: science, dtype: float64

In [17]:
get_similar_book("science",1)

romantic    1.350000e+00
history     4.935990e-17
action     -8.680371e-01
science    -1.500000e+00
Name: science, dtype: float64

In [18]:
action_lover = [("action", 5),("romantic",1),("history",2),("science",3)]


In [20]:
similar_books = pd.DataFrame()

In [24]:
for book,rating in action_lover:
    similar_books = similar_books.append(get_similar_book(book,rating), ignore_index=True)

In [25]:
similar_books.head()

Unnamed: 0,action,science,romantic,history
0,2.5,1.446728,-1.446728,-1.86883
1,0.868037,1.35,-1.5,0.1603567
2,0.373766,1.6453300000000002e-17,0.053452,-0.5
3,0.289346,0.5,-0.45,-1.6453300000000002e-17
4,2.5,1.446728,-1.446728,-1.86883


In [27]:
similar_books.sum().sort_values(ascending=False)

action      8.062297
science     6.593457
history    -4.416946
romantic   -6.686552
dtype: float64