In [165]:
import json
import math

def get_recipes_sections(user_id):
    if _needs_cold_start(user_id):
        return _get_cold_start_recipes(user_id)

    recipes = _recommend_recipes(user_id) + _get_cold_start_recipes(user_id)
    return recipes.sort(key=get_rank)

def get_rank(element):
    return element['rank']


def _needs_cold_start(user_id):
    df = get_df_from(COUNT_USER_RATINGS_QUERY.format(user_id), ['count'])

    return df[df['count'] >= COLD_START_RATING_AMOUNT].empty

    # TODO: 1. get the overall most popular (vote count + rating)
    #       2. sort the categories by popularity
    #           2.1 for each category -> get the most popular (vote count + rating)
    #       3. if there are other users in the DB
    #           3.1 do Count Vectorizer for the user metadata to find similar users
    #           3.2 ge
    # t the other user's top rated recipes
    #       4. recommend recipes by the time of the day (0800 -> breakfast)


def _get_cold_start_recipes(user_id):
    conn = connect()
    most_popular_df = execute_select(conn, MOST_POPULAR_QUERY, RECIPE_COLUMNS)
    top_categories_df = execute_select(conn, TOP_CATEGORIES_QUERY,
                                       TOP_CATEGORIES_COLUMNS)
    category_sections = [_create_sections_of(category.category, _get_rank(top_categories_df.__len__(), category.row_num), conn) for category in top_categories_df.itertuples()]
    recipes_json = json.loads(most_popular_df.to_json(orient='records'))
    popular_section = [{'name': 'Popular On Eatin', 'recipes': recipes_json, 'rank': 0}]
    conn.close()

    return popular_section + category_sections

def _get_rank(length, row):
    chunk = math.ceil(length / 2.99)
    return (row / chunk) // 1 + 1

def _create_sections_of(category, rank, conn):
    df = execute_select(conn, TOP_RECIPES_FOR_CATEGORY_QUERY.format(category), RECIPE_COLUMNS)
    recipes_json = json.loads(df.to_json(orient='records'))
    return {'name': category, 'recipes': recipes_json, 'rank': int(rank)}


def _recommend_recipes(user_id):
    return generate_tf_idf_recommendations(user_id)

def connect():
    conn = None
    try:
        print('Connecting...')
        conn = psycopg2.connect(
            host='10.10.248.108',
            database='postgres',
            user='eatin',
            password='eatin')
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    print('Connection Successful')

    return conn


def execute_select(conn, query, column_names):
    cursor = conn.cursor()
    data = pd.DataFrame()
    try:
        cursor.execute(query)
        data = cursor.fetchall()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    cursor.close()

    return pd.DataFrame(data, columns=column_names)


def get_df_from(query, columns):
    conn = connect()
    df = execute_select(conn, query, columns)
    conn.close()

    return df

RECIPE_AMOUNT = 3
COLD_START_RATING_AMOUNT = 3

COUNT_USER_RATINGS_QUERY = "select count(*) from ratings where user_id = '{}'"
ALL_RECIPES_QUERY = "select * from recipes"
USER_RATINGS_COLUMNS = ['user_id', 'recipe_id', 'rating']
GET_USER_TOP_RATED_RECIPES_QUERY = "select recipes.recipe_title from ratings, recipes \
                                        where ratings.user_id = '{}' \
                                        and ratings.recipe_index = recipes.index \
                                        order by ratings.rating desc \
                                        limit {}"

MOST_POPULAR_QUERY = "select * from recipes order by vote_count desc, rating desc limit 20;"

TOP_CATEGORIES_QUERY = "SELECT \
  category, \
  COUNT(*) AS recipe_count, \
  SUM(vote_count) AS total_votes, \
  AVG(rating) AS average_rating, \
  (0.4 * SUM(vote_count) + 0.4 * AVG(rating) + 0.2 * COUNT(*)) AS popularity_score, \
  ROW_NUMBER () OVER (ORDER BY (0.4 * SUM(vote_count) + 0.4 * AVG(rating) + 0.2 * COUNT(*)) desc) as row_num \
FROM recipes \
GROUP BY category \
ORDER BY popularity_score DESC \
LIMIT 10;"

TOP_RECIPES_FOR_CATEGORY_QUERY = "select * from recipes \
where category = '{}' \
order by (vote_count * rating) desc \
limit 20;"


TOP_CATEGORIES_COLUMNS = ['category', 'recipe_count', 'total_votes', 'average_rating', 'popularity_score', 'row_num']
RECIPE_COLUMNS = ['index',
                  'recipe_title',
                  'url',
                  'record_health',
                  'vote_count',
                  'rating',
                  'description',
                  'cuisine',
                  'course',
                  'diet',
                  'prep_time',
                  'cook_time',
                  'ingredients',
                  'instructions',
                  'author',
                  'tags',
                  'category',
                  'image',
                  'difficulty',
                  'total_time']

def load_model():
    # TODO: when saving the model in a data structure, the load it from it
    return ''


def process_text(text):
    text = ' '.join(text.split())
    text = text.lower()

    return text


def index_from_title(df, title):
    return df[df['recipe_title'] == title].index.values[0]


def title_from_index(df, index):
    return df[df.index == index].recipe_title.values[0]


def recommendations(recipe_title, df, cosine_similarity_matrix, number_of_recommendations):
    index = index_from_title(df, recipe_title)
    similarity_scores = list(enumerate(cosine_similarity_matrix[index]))
    similarity_scores_sorted = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    recommendations_indices = [t[0] for t in similarity_scores_sorted[1:(number_of_recommendations + 1)]]

    return df.iloc[recommendations_indices]


# TODO: This model receives a recipe title (I think I can change it to recipe index),
#       and returns the top 12 recipes that has the most similar description, using tf-idf.
#       I suppose there are several ways to get the recipe title. Currently, I will do:
#       1. get all of the user's ratings from the ratings table.
#       2. get the top X recipes with the highest rating
#       3. run the model on each recipe
#       4. combine the results and drop duplicates
def generate_tf_idf_recommendations(user_id):
    all_recipes = get_df_from(ALL_RECIPES_QUERY, RECIPE_COLUMNS)
    cosine_similarity_matrix = _calc_model(all_recipes)
    user_liked_recipes_df = get_df_from(GET_USER_TOP_RATED_RECIPES_QUERY.format(user_id, RECIPE_AMOUNT),
                                        ['recipe_title'])

    return [_build_section(recipe_title, all_recipes, cosine_similarity_matrix, index + 1) for index, recipe_title in
            enumerate(user_liked_recipes_df['recipe_title'])]


def _build_section(recipe_title, all_recipes, cosine_similarity_matrix, rank):
    df = recommendations(recipe_title, all_recipes, cosine_similarity_matrix, 12)
    recipes_json = json.loads(df.to_json(orient='records'))

    return {'name': 'Because You Liked {}'.format(recipe_title), 'recipes': recipes_json, 'rank': rank}


def _calc_model(all_recipes):
    df = all_recipes[all_recipes['description'].notna()]
    df['description'] = df.apply(lambda x: process_text(x.description), axis=1)

    tf_idf = TfidfVectorizer(stop_words='english')
    tf_idf_matrix = tf_idf.fit_transform(df['description'])
    cosine_similarity_matrix = cosine_similarity(tf_idf_matrix, tf_idf_matrix)

    return cosine_similarity_matrix

In [167]:
import pandas as pd
import psycopg2
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
df = _get_cold_start_recipes("mWZhyn9C7FTM1XpvZSSTvttvnGm2")
# df = _recommend_recipes("mWZhyn9C7FTM1XpvZSSTvttvnGm2")

Connecting...
Connection Successful


In [168]:
df

[{'name': 'Popular On Eatin',
  'recipes': [{'index': 7928,
    'recipe_title': 'Vegetarian Thai Green Curry Recipe',
    'url': 'https://www.archanaskitchen.com/video-recipe-vegetarian-thai-green-curry',
    'record_health': 'good',
    'vote_count': 80628,
    'rating': 4.7121674756,
    'description': 'The Vegetarian Thai Green Curry is close to authentic Thai made from   with the vibrant fresh hot chilli peppers and green coriander leaves, ground with lemon grass, I like to load it with the choicest vegetables making it simply delectable and spicy. I personally like the curry being green, hence I grind some fresh coriander leaves or even Italian parsley, to give it a burst of emerald colour. Serve the Vegetarian Thai Green Curry Recipe with a bowl of   or   t o enjoy your Thai style Meal.  The choice of vegetables for the curry is solely your choice and taste. You can make your choices from the following combinations. The list can be so imaginative, that I purely like to play with 