## Recommendation Systems

### Using KNN

In [100]:
# Importing libraries

import numpy as np
import pandas as pd

In [101]:
# Loading jokes dataset

jokes_df = pd.read_csv("jester_items.csv")
jokes_df.head()

Unnamed: 0,jokeId,jokeText
0,1,"A man visits the doctor. The doctor says ""I ha..."
1,2,This couple had an excellent relationship goin...
2,3,Q. What's 200 feet long and has 4 teeth? \n\nA...
3,4,Q. What's the difference between a man and a t...
4,5,Q.\tWhat's O. J. Simpson's Internet address? \...


In [102]:
# Loading joke ratings dataset

rating_df = pd.read_csv("jester_ratings.csv")
rating_df.head()

Unnamed: 0,userId,jokeId,rating
0,1,5,0.219
1,1,7,-9.281
2,1,8,-9.281
3,1,13,-6.781
4,1,15,0.875


In [103]:
# Merging datasets

df_new = rating_df.merge(jokes_df, on='jokeId')

In [104]:
df_new.head()

Unnamed: 0,userId,jokeId,rating,jokeText
0,1,5,0.219,Q.\tWhat's O. J. Simpson's Internet address? \...
1,2,5,-9.688,Q.\tWhat's O. J. Simpson's Internet address? \...
2,3,5,-9.844,Q.\tWhat's O. J. Simpson's Internet address? \...
3,4,5,-5.812,Q.\tWhat's O. J. Simpson's Internet address? \...
4,5,5,6.906,Q.\tWhat's O. J. Simpson's Internet address? \...


In [106]:
# Data Cleaning

df_clean = df_new.dropna(axis=0, subset=['jokeText'])
df_clean.head()

Unnamed: 0,userId,jokeId,rating,jokeText
0,1,5,0.219,Q.\tWhat's O. J. Simpson's Internet address? \...
1,2,5,-9.688,Q.\tWhat's O. J. Simpson's Internet address? \...
2,3,5,-9.844,Q.\tWhat's O. J. Simpson's Internet address? \...
3,4,5,-5.812,Q.\tWhat's O. J. Simpson's Internet address? \...
4,5,5,6.906,Q.\tWhat's O. J. Simpson's Internet address? \...


In [107]:
df_clean.isna().sum()

userId      0
jokeId      0
rating      0
jokeText    0
dtype: int64

In [108]:
# Total rating count for each joke

joke_rating_count = (df_clean.groupby(by=['jokeText'])['rating'].count().reset_index().
                rename(columns={'rating':'rating_count'})[['jokeText','rating_count']])
joke_rating_count.head()

Unnamed: 0,jokeText,rating_count
0,President Clinton looks up from his desk in t...,230
1,"""May I take your order?"" the waiter asked. \n\...",8342
2,(A) The Japanese eat very little fat and suffe...,11268
3,"A Briton, a Frenchman and a Russian are viewin...",16525
4,A Czechoslovakian man felt his eyesight was gr...,9385


In [109]:
# Merging datasets

final_data = joke_rating_count.merge(df_clean, left_on='jokeText', right_on='jokeText', how='left')

In [110]:
final_data.head()

Unnamed: 0,jokeText,rating_count,userId,jokeId,rating
0,President Clinton looks up from his desk in t...,230,1,31,8.781
1,President Clinton looks up from his desk in t...,230,2,31,5.5
2,President Clinton looks up from his desk in t...,230,5,31,3.219
3,President Clinton looks up from his desk in t...,230,6,31,5.969
4,President Clinton looks up from his desk in t...,230,7,31,-0.969


In [111]:
# Mean

final_data['rating_count'].mean()

23693.53717443522

In [112]:
# Mode

final_data['rating_count'].mode()

0    59122
Name: rating_count, dtype: int64

In [113]:
# Median

final_data['rating_count'].median()

16093.0

In [114]:
# Considering rows with rating_count above the popularity threshold

popularity_threshold = 20000   # popular jokes

popular_joke = final_data[final_data.rating_count > popularity_threshold]

In [115]:
popular_joke.head()

Unnamed: 0,jokeText,rating_count,userId,jokeId,rating
99936,A couple of hunters are out in the woods in th...,21822,1,105,2.0
99937,A couple of hunters are out in the woods in th...,21822,3,105,1.25
99938,A couple of hunters are out in the woods in th...,21822,8,105,0.875
99939,A couple of hunters are out in the woods in th...,21822,10,105,0.219
99940,A couple of hunters are out in the woods in th...,21822,13,105,7.031


In [116]:
# Pivot table

jokes_rating_pt = popular_joke.pivot_table(index='jokeText', columns='userId', values='rating', fill_value=0)
jokes_rating_pt.head()

userId,1,2,3,4,5,6,7,8,9,10,...,63969,63970,63971,63972,63973,63974,63975,63976,63977,63978
jokeText,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"A couple of hunters are out in the woods in the deep south when one of them falls to the ground. He doesn't seem to be breathing, and his eyes are rolled back in his head. The other guy whips out his cell phone and calls 911. He gasps to the operator, ""My friend is dead! What can I do?"" The operator, in a calm and soothing voice, says, ""Alright, take it easy. I can help. First, let's make sure he's dead."" There is silence, and then a gun shot is heard. The hunter comes back on the line. ""Okay. Now what??""",2.0,0.0,1.25,0.0,0.0,0.0,0.0,0.875,0.0,0.219,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.938
"A dog walks into Western Union and asks the clerk to send a telegram. He fills out a form on which he\nwrites down the telegram he wishes to send: ""Bow wow wow, Bow wow wow.""\n\nThe clerk says, ""You can add another 'Bow wow' for the same price.""\n\nThe dog responded, ""Now wouldn't that sound a little silly?"" \n",-7.469,-2.688,-9.812,0.0,5.656,-9.156,0.406,-2.219,8.844,-6.312,...,0.656,-9.062,0.0,0.0,5.219,3.75,-9.188,0.0,-2.75,-6.375
"A little boy goes to his dad and asks, ""What is politics?"" His dad says, ""Well son, let me try to explain it this way: I'm the breadwinner of the family, so let's call me capitalism. Your Mom, she's the administrator of the money, so we'll call her the government. We're here to take care of your needs, so we'll call you the people. The nanny, we'll consider her the working class. And your baby brother, we'll call him the future. Now, think about that and see if that makes sense."" So the little boy goes off to bed thinking about what dad had said. Later that night, he hears his baby brother crying, so he gets up to check on him. He finds that the baby has severely soiled his diaper. So the little boy goes to his parents' room and finds his mother sound asleep. Not wanting to wake her, he goes to the nanny's room. Finding the door locked, he peeks in the keyhole and sees his father in bed with the nanny. He gives up and goes back to bed. The next morning, the little boy says to his father, ""Dad, I think I understand the concept of politics now."" The father says, ""Good, son. Tell me in your own words what you think politics is all about."" The little boy replies, ""Well, while capitalism is screwing the working class, the government is sound asleep, the people are being ignored and the future is in deep shit.""",8.688,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.5
"A man arrives at the gates of heaven. St. Peter asks, ""Religion?"" \nThe man says, ""Methodist."" St. Peter looks down his list, and says, \n""Go to room 24, but be very quiet as you pass room 8."" \n\nAnother man arrives at the gates of heaven. ""Religion?""\n""Baptist."" ""Go to room 18, but be very quiet as you pass room 8."" \n\nA third man arrives at the gates. ""Religion?"" ""Jewish.""\n""Go to room 11, but be very quiet as you pass room 8."" \nThe man says, ""I can understand there being different rooms for different religions, but why\nmust I be quiet when I pass room 8?"" St. Peter tells him, ""Well the Catholics are in room 8, \nand they think they're the only ones here.\n",8.781,-4.25,0.0,0.0,4.906,0.0,1.969,-0.875,0.0,9.531,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.531
"A radio conversation of a US naval \nship with Canadian authorities ... \n\nAmericans: Please divert your course 15 degrees to the North to avoid a\ncollision.\n\nCanadians: Recommend you divert YOUR course 15 degrees to the South to \navoid a collision.\n\nAmericans: This is the Captain of a US Navy ship. I say again, divert \nYOUR course.\n\nCanadians: No. I say again, you divert YOUR course.\n\nAmericans: This is the aircraft carrier USS LINCOLN, the second largest ship in the United States' Atlantic Fleet. We are accompanied by three destroyers, three cruisers and numerous support vessels. I demand that you change your course 15 degrees north, that's ONE FIVE DEGREES NORTH, or counter-measures will be undertaken to ensure the safety of this ship.\n\nCanadians: This is a lighthouse. Your call.\n",9.812,9.562,0.0,0.0,6.0,-5.094,3.281,3.75,0.0,6.688,...,0.0,0.0,0.0,0.0,0.0,8.188,0.0,0.0,0.0,7.438


In [117]:
from scipy.sparse import csr_matrix

In [118]:
# Cosine Similarity matrix

jokes_rating_pt_matrix = csr_matrix(jokes_rating_pt.values)

In [119]:
from sklearn.neighbors import NearestNeighbors

In [120]:
# Fitting K-nearest nearest neighbours

knn_model = NearestNeighbors(metric='cosine', algorithm='brute')
knn_model.fit(jokes_rating_pt_matrix)

NearestNeighbors(algorithm='brute', metric='cosine')

In [121]:
# Randomly picking a value

query_index = np.random.choice(jokes_rating_pt.shape[0])
print(query_index)

11


In [122]:
# Obtaining the distances

distances, indices = knn_model.kneighbors(jokes_rating_pt.iloc[query_index,:].values.reshape(1, -1), n_neighbors=6)

In [123]:
# Print Output

for i in range(0, len(distances.flatten())):   # flatten distances
    if i == 0:
        print('Recommendations for {0}:\n'.format(jokes_rating_pt.index[query_index]))
    else: 
        print('{0}:{1}, with distance of {2}:'. format(i, jokes_rating_pt.index[indices.flatten()[i]], distances.flatten()[i]))
        

Recommendations for Q. What is orange and sounds like a parrot?  

A. A carrot.
:

1:Q:  What did the blind person say when given some matzah?

A:  Who the hell wrote this?
, with distance of 0.6338044215243199:
2:A dog walks into Western Union and asks the clerk to send a telegram. He fills out a form on which he
writes down the telegram he wishes to send: "Bow wow wow, Bow wow wow."

The clerk says, "You can add another 'Bow wow' for the same price."

The dog responded, "Now wouldn't that sound a little silly?" 
, with distance of 0.6424599683969734:
3:Q. Did you hear about the dyslexic devil worshiper? 

A. He sold his soul to Santa.
, with distance of 0.7125070068647992:
4:How many feminists does it take to screw in a light bulb?
That's not funny.
, with distance of 0.7138175990274462:
5:They asked the Japanese visitor if they have elections in his
country.  
"Every Morning" he answers.
, with distance of 0.7398406975910854:


### SVD

In [131]:
#!pip install surprise

In [142]:
from surprise import SVD, Dataset, Reader
from surprise.model_selection import cross_validate, train_test_split

In [143]:
# Range of ratings 

print('Minimum rating:', final_data['rating'].min())
print('Maximum rating:', final_data['rating'].max())

Minimum rating: -10.0
Maximum rating: 10.0


In [144]:
# Reader based on the scale of ratings

reader = Reader(rating_scale=(-10, 10))

In [145]:
# Considering the dataframe in user_id, product and rating order

dataset_instance = final_data[['userId', 'jokeText', 'rating']]
dataset_instance.head()

Unnamed: 0,userId,jokeText,rating
0,1,President Clinton looks up from his desk in t...,8.781
1,2,President Clinton looks up from his desk in t...,5.5
2,5,President Clinton looks up from his desk in t...,3.219
3,6,President Clinton looks up from his desk in t...,5.969
4,7,President Clinton looks up from his desk in t...,-0.969


In [146]:
data = Dataset.load_from_df(dataset_instance, reader)

In [147]:
# Train Test Split

trainset, testset = train_test_split(data, test_size=0.25)

In [148]:
# Fitting the Model

svd_model = SVD(n_factors=100)
svd_model.fit(trainset)

In [149]:
# shape of matrix

svd_model.qi.shape

(140, 100)

In [150]:
# Predicting using the model

user_id = dataset_instance['userId'].loc[dataset_instance.index[50]]
joke = dataset_instance['userId'].loc[dataset_instance.index[50]]

result_matrix.predict(user_id, joke)

Prediction(uid=151, iid=151, r_ui=None, est=2.2305683751210936, details={'was_impossible': False})