In [1]:
import pandas as pd
import numpy as np
import heapq
from math import floor

In [2]:
from surprise import Reader, Dataset
from surprise import KNNWithMeans

In [3]:
recipes_df = pd.read_csv('rr-recipes.csv')
users_df = pd.read_csv('rr-users.csv')
master_ratings_df = pd.read_csv('rr-ratings.csv')

In [4]:
recipes_df.columns

Index(['recipe_id', 'title', 'prep_time', 'cook_time', 'ready_time',
       'ingredients', 'directions', 'url', 'photo_url'],
      dtype='object')

In [5]:
recipes_df.head()

Unnamed: 0,recipe_id,title,prep_time,cook_time,ready_time,ingredients,directions,url,photo_url
0,7000,Golden Crescent Rolls Recipe,25,15,190,"yeast,water,white sugar,salt,egg,butter,flour,...","Dissolve yeast in warm water.**Stir in sugar, ...",https://www.allrecipes.com/recipe/7000,https://images.media-allrecipes.com/userphotos...
1,7001,Poppy Seed Bread with Glaze Recipe,15,60,80,"flour,salt,baking powder,poppy,butter,vegetabl...",'Preheat oven to 350 degrees F (175 degrees C)...,https://www.allrecipes.com/recipe/7001,https://images.media-allrecipes.com/userphotos...
2,7003,Applesauce Bread I Recipe,10,80,90,"flour,egg,white sugar,vegetable oil,applesauce...",Preheat oven to 350 degrees F (175 degrees C)....,https://www.allrecipes.com/recipe/7003,https://images.media-allrecipes.com/userphotos...
3,7006,Apple Raisin Bread Recipe,15,60,75,"flour,baking powder,baking soda,salt,cinnamon,...",Preheat oven to 350 degrees F (175 degrees C)....,https://www.allrecipes.com/recipe/7006,https://images.media-allrecipes.com/userphotos...
4,7007,Buttermilk Oatmeal Bread Recipe,20,60,100,"oat,buttermilk,vegetable oil,egg,brown sugar,f...",Mix oats with buttermilk. Let stand for 1/2 h...,https://www.allrecipes.com/recipe/7007,https://images.media-allrecipes.com/userphotos...


In [6]:
users_df.head()

Unnamed: 0.1,Unnamed: 0,user_id,AR_id,link
0,0,1,naples34102,https://www.allrecipes.com/cook/naples34102
1,1,2,1207425,https://www.allrecipes.com/cook/1207425
2,2,3,183259,https://www.allrecipes.com/cook/183259
3,3,4,162514,https://www.allrecipes.com/cook/162514
4,4,5,1159515,https://www.allrecipes.com/cook/1159515


In [7]:
master_ratings_df.head()

Unnamed: 0,user,item,rating
0,675719,7000,5
1,1478626,7000,5
2,608663,7000,5
3,2785736,7000,5
4,594474,7000,5


In [8]:
ratings_df = master_ratings_df.copy()
ratings_df.columns = ['user', 'item', 'rating']

In [9]:
ratings_df['user'].value_counts()

2043209     1177
268713       949
1153011      922
303790       746
379190       721
            ... 
3823917        1
7182005        1
16423221       1
5959992        1
1599757        1
Name: user, Length: 618190, dtype: int64

In [10]:
ratings_df.head()

Unnamed: 0,user,item,rating
0,675719,7000,5
1,1478626,7000,5
2,608663,7000,5
3,2785736,7000,5
4,594474,7000,5


In [11]:
ratings_df.value_counts()

user      item   rating
24896382  24956  5         1
1540603   20242  5         1
          8890   5         1
          10574  4         1
          13571  4         1
                          ..
3874630   14533  5         1
          20038  5         1
          23788  5         1
          24566  5         1
16        7375   5         1
Length: 1555581, dtype: int64

In [12]:
#ratings_df.drop_duplicates(subset =["user","item"],keep = "first", inplace = True)

In [13]:
reader = Reader(rating_scale=(1,5)) # This just defines the rating scale
data = Dataset.load_from_df(ratings_df[['user', 'item', 'rating']], reader=reader)

In [14]:
def build_recommender(user_based=False, sim_type='cosine'):
    sim_options = {
        "name": sim_type,
        "user_based": user_based
    }

    return KNNWithMeans(sim_options=sim_options)

In [15]:
trainset = data.build_full_trainset()

# user_based_recommender = build_recommender(user_based=True)
item_based_recommender = build_recommender()

# User based seems to give a memory error when fit, due to the much larger amount of users than recipes.
# user_based_recommender.fit(trainset)
item_based_recommender.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x10a0f2f70>

In [16]:
i = 1
for i in range(150):
    prediction = item_based_recommender.predict(i,167)
    print(round(prediction.est,2), end=', ')
    i = i + 1

4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 4.51, 

In [17]:
def get_r(user_id):
    recommender_system = item_based_recommender
    N = 200
    rated_items = list(set(ratings_df.loc[ratings_df['user'] == user_id]['item'].tolist()))
    all_item_ids = list(set(ratings_df['item'].tolist()))
    new_items = [x for x in all_item_ids if x not in rated_items]

    predicted_ratings = {}
    for item_id in new_items:
        predicted_ratings[item_id] = recommender_system.predict(user_id, item_id).est
        pass

    recommended_ids = heapq.nlargest(N, predicted_ratings, key=predicted_ratings.get)
    recommended_ids = sorted(recommended_ids)

    recommended_df = recipes_df.loc[recipes_df['recipe_id'].isin(recommended_ids)].copy()
    recommended_df.set_index('recipe_id', inplace=True)
    recommended_df.insert(1, 'pred_rating', np.zeros(len(recommended_ids)))

    for idx,item_id in enumerate(recommended_ids):
        recommended_df.iloc[idx, recommended_df.columns.get_loc('pred_rating')] =predicted_ratings[item_id]
        pass

    return recommended_df.head(N).sort_values('pred_rating', ascending=False)

In [18]:
# ask the user for input
# get their ID number
user_id = int(input('Enter user id: '))

# get them to list some ingredients, currently it breaks if the second or next ingredient is not there
ingredient_list = input('Enter the ingredients separated by commas that you have on hand: ')

items = np.array(ingredient_list.split(','))

rating = int(input('Enter the lowest rating you\'ll accept: '))

user_name = users_df.loc[users_df['user_id'] == user_id]

print('\nuser: ',user_name.iloc[0,1])
print(ingredient_list)
print('\nHere are your recommendations.')
test = get_r(user_id)
for item in items:
    test = test[test['ingredients'].str.contains(item)]
test = test[test['pred_rating'] >= rating]

Enter user id:  1478626
Enter the ingredients separated by commas that you have on hand:  chocolate
Enter the lowest rating you'll accept:  3


IndexError: single positional indexer is out-of-bounds

In [None]:
rec_df = pd.read_csv('rr-recipes.csv')
u_df = pd.read_csv('rr-users.csv')
r_df = pd.read_csv('rr-ratings.csv')

In [None]:
a = 99
r_df.loc[master_ratings_df['reviewer_id'] == a]