# Capstone Term 2 - Project

* Marcos Bittencourt
---
* Contributors
    * Todd McCullough [Git](https://github.com/tamccullough)
    * Savya Sharma [Git](https://github.com/SavyaSharma)
    * Marko Topitch [Git](https://github.com/TopMarko)
---

### Load the Model and Data

##### Import the Needed Modules

In [1]:
import pandas as pd
import numpy as np
import heapq
import pickle

##### Load the production ready model

In [2]:
filename = 'recipes_recommender_model.sav'
rr_model = pickle.load(open(filename, 'rb'))

##### Import Data

In [3]:
recipes_df = pd.read_csv('datasets/rr-recipes.csv')
users_df = pd.read_csv('datasets/rr-users.csv')
ratings_df = pd.read_csv('datasets/rr-ratings.csv')

### Recommendation Functions

In [4]:
def get_r(user_id):
    # Select which system to use. Due to memory constraints, item based is the only viable option
    recommender_system = rr_model
    # N will represent how many items to recommend
    N = 200
    # The setting to a set and back to list is a failsafe.
    rated_items = list(set(ratings_df.loc[ratings_df['user'] == user_id]['item'].tolist()))
    # Self explanitory name
    all_item_ids = list(set(ratings_df['item'].tolist()))
    # New_items just represents all the items not rated by the user
    new_items = [x for x in all_item_ids if x not in rated_items]
    # Estimate ratings for all unrated items
    predicted_ratings = {}
    for item_id in new_items:
        predicted_ratings[item_id] = recommender_system.predict(user_id, item_id).est
        pass
    # Get the item_ids for the top ratings
    recommended_ids = heapq.nlargest(N, predicted_ratings, key=predicted_ratings.get)
    recommended_ids = sorted(recommended_ids)
    # predicted_ratings
    recommended_df = recipes_df.loc[recipes_df['recipe_id'].isin(recommended_ids)].copy()
    recommended_df.set_index('recipe_id', inplace=True)
    recommended_df.insert(1, 'pred_rating', np.zeros(len(recommended_ids)))
    # recommended_df = recipes_df.copy()
    for idx,item_id in enumerate(recommended_ids):
        recommended_df.iloc[idx, recommended_df.columns.get_loc('pred_rating')] =predicted_ratings[item_id]
        pass
    return recommended_df.head(N).sort_values('pred_rating', ascending=False)

def set_up_rr(user_id,ingredient_list):
    # split the input up into an array for the loop
    items = ingredient_list.split(',')
    rr_list = get_r(user_id)
    for j in range(0,len(items)):
        rr_list = rr_list[rr_list['ingredients'].str.contains(items[j])]
    return rr_list

def mk_tbl(rows):
    #this is for creating dynamic tables
    arr = []
    for row in rows:
        title = row[2]
        r_t = row[7]
        p_t = row[5]
        c_t = row[6]
        url = row[10]
        pred = row[1]
        arr.append([pred,title,r_t,p_t,c_t,url])
    return arr

### Get a Recommendation Based on Ingredients

The final code that will be impletented in a cleaner fashion through the browser interface.

In [5]:
user_id = 35
ingredient_list = 'chocolate,cheese'
table_list = set_up_rr(user_id,ingredient_list)

In [6]:
table_list = table_list.to_numpy()
rr_recipes = pd.DataFrame(mk_tbl(table_list))
rr_recipes

Unnamed: 0,0,1,2,3,4,5
0,5.0,Easy Chocolate Cream Cheese Frosting,,,,https://www.allrecipes.com/recipe/10342/easy-c...
1,4.714286,Baked Chocolate Caramel Cheesecake,8 h 45 m,1 Hour,1 Hour 45 Minutes,https://www.allrecipes.com/recipe/100827/baked...
