In [1]:
from sklearn.feature_extraction.text import CountVectorizer
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt

## Popularity Based Recommendations

Recommending the items based on the popularity of the items among the users (just like trending videos on youtube)

In [2]:

food_df=pd.read_csv('./db/food_new.csv')
food_df.columns = ['food_id','title','canteen_id','price', 'num_orders', 'category', 'avg_rating', 'num_rating', 'tags']

food_df.head()

Unnamed: 0,food_id,title,canteen_id,price,num_orders,category,avg_rating,num_rating,tags
0,1,Lala Maggi,1,30,20059,maggi,3.9,10,"veg, spicy"
1,2,Cheese Maggi,1,25,20128,maggi,3.8,15,veg
2,3,Masala Maggi,1,25,19724,maggi,3.0,10,"veg, spicy"
3,4,Veg Maggi,1,30,19904,maggi,2.5,5,"veg, healthy"
4,5,Paneer Tikka,1,60,20454,Punjabi,4.6,30,"veg, healthy"


In [3]:
mean_value = food_df['avg_rating'].mean()
min_value = food_df['num_rating'].quantile(0.6)
q_items = food_df.copy().loc[food_df['num_rating'] >= min_value]
def weighted_rating(x, min_value=min_value, mean_value=mean_value):
    ratings = x['num_rating']
    avg_value = x['avg_rating']
    return (ratings/(ratings+min_value) * avg_value) + (min_value/(min_value+ ratings ) * mean_value)
q_items['score'] = q_items.apply(weighted_rating, axis=1)
top_rated_items = q_items.sort_values('score', ascending=False)
frequent_items= food_df.sort_values('num_orders', ascending=False)

##### Top Rated Items

In [4]:
top_rated_items[['title', 'num_rating', 'avg_rating', 'score']].head()

Unnamed: 0,title,num_rating,avg_rating,score
47,Chapathi,40,4.8,4.562824
48,Vanilla MilkShake,35,4.7,4.488444
4,Paneer Tikka,30,4.6,4.418483
12,Chicken Paneer Butter Masala,30,4.6,4.418483
6,Paneer Butter Masala,30,4.6,4.418483


#### Frequent Order Items

In [5]:
frequent_items[['title', 'num_orders']].head()

Unnamed: 0,title,num_orders
4,Paneer Tikka,20454
19,Chicken Maggi,20189
30,Veg Noodles,20185
43,Kova,20169
26,Veg Chowmein,20153


## Content Based Filtering

A bit more personalised recommendation. We will be analysing the past orders of the user and suggesting back those items which are similar.

Also, since each person has a "home canteen", the user should be notified any new items included in the menu by the vendor.

In [6]:
def create_soup(x):            
    tags = x['tags'].lower().split(', ')
    tags.extend(x['title'].lower().split())
    tags.extend(x['category'].lower().split())
    return " ".join(sorted(set(tags), key=tags.index))

food_df['soup'] = food_df.apply(create_soup, axis=1)
food_df.head(3)

Unnamed: 0,food_id,title,canteen_id,price,num_orders,category,avg_rating,num_rating,tags,soup
0,1,Lala Maggi,1,30,20059,maggi,3.9,10,"veg, spicy",veg spicy lala maggi
1,2,Cheese Maggi,1,25,20128,maggi,3.8,15,veg,veg cheese maggi
2,3,Masala Maggi,1,25,19724,maggi,3.0,10,"veg, spicy",veg spicy masala maggi


In [7]:

count = CountVectorizer(stop_words='english')

count_matrix = count.fit_transform(food_df['soup'])

from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(count_matrix, count_matrix)

indices_from_title = pd.Series(food_df.index, index=food_df['title'])
indices_from_food_id = pd.Series(food_df.index, index=food_df['food_id'])

In [8]:
def get_recommendations(title="", cosine_sim=cosine_sim, idx=-1):
    if idx == -1 and title != "":
        idx = indices_from_title[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:3]
    food_indices = [i[0] for i in sim_scores]
    return food_indices

In [9]:
food_df.loc[get_recommendations(title="Paneer Tikka")]

Unnamed: 0,food_id,title,canteen_id,price,num_orders,category,avg_rating,num_rating,tags,soup
8,9,Paneer Masala,1,60,19658,Punjabi,4.6,30,"veg, healthy, spicy",veg healthy spicy paneer masala punjabi
6,7,Paneer Butter Masala,1,60,20136,Punjabi,4.6,30,"veg, healthy, spicy",veg healthy spicy paneer butter masala punjabi


We will now use some functions, some of which are utility functions, others are actually the functions which will help get personalised recommendations for the current user.

In [10]:
def get_latest_user_orders(user_id, orders, num_orders=3):
    counter = num_orders
    order_indices = []
    
    for index, row in orders[['user_id']].iterrows():
        if row.user_id == user_id:
            counter = counter -1
            order_indices.append(index)
        if counter == 0:
            break
            
    return order_indices

In [11]:
def get_recomms_df(food_indices, food_df, columns, comment):
    row = 0
    df = pd.DataFrame(columns=columns)
    
    for i in food_indices:
        df.loc[row] = food_df[['title', 'canteen_id', 'price']].loc[i]
        df.loc[row].comment = comment
        row = row+1
    return df

In [12]:
def personalised_recomms(orders, food_df, user_id, columns, comment="based on your past orders"):
    order_indices = get_latest_user_orders(user_id, orders)
    food_ids = []
    food_indices = []
    recomm_indices = []
    
    for i in order_indices:
        food_ids.append(orders.loc[i].food_id)
    for i in food_ids:
        food_indices.append(indices_from_food_id[i])
    for i in food_indices:
        recomm_indices.extend(get_recommendations(idx=i))
        
    return get_recomms_df(set(recomm_indices), food_df, columns, comment)

In [13]:
def get_new_and_specials_recomms(new_and_specials, users, food_df, canteen_id, columns, comment="new/today's special item  in your home canteen"):
    food_indices = []
    
    for index, row in new_and_specials[['canteen_id']].iterrows():
        if row.canteen_id == canteen_id:
            food_indices.append(indices_from_food_id[new_and_specials.loc[index].food_id])
            
    return get_recomms_df(set(food_indices), food_df, columns, comment)

In [14]:
def get_user_home_canteen(users, user_id):
    for index, row in users[['user_id']].iterrows():
        if row.user_id == user_id:
            return users.loc[index].home_canteen
    return -1

In [15]:
def get_top_rated_items(top_rated_items, df1, columns, comment="top rated items across canteens"):
    food_indices = []
    
    for index, row in top_rated_items.iterrows():
        # print("Index value is ",index)
        food_indices.append(indices_from_food_id[top_rated_items.loc[index].food_id])
        
    return get_recomms_df(food_indices, df1, columns, comment)

In [16]:
def get_popular_items(pop_items, df1, columns, comment="most popular items across canteens"):
    food_indices = []
    
    for index, row in pop_items.iterrows():
        food_indices.append(indices_from_food_id[pop_items.loc[index].food_id])
        
    return get_recomms_df(food_indices, df1, columns, comment)

### Final Results.

In [29]:
orders = pd.read_csv('./db/new_orders.csv')
new_and_specials = pd.read_csv('./db/new_and_specials.csv')
users = pd.read_csv('./db/users.csv')

columns = ['title', 'canteen_id', 'price', 'comment']
current_user = 4
current_canteen = get_user_home_canteen(users, current_user)

res = personalised_recomms(orders, food_df, current_user, columns)
res
get_new_and_specials_recomms(new_and_specials, users, food_df, current_canteen, columns).head()
# print("Top rated",top_rated_items)
get_top_rated_items(top_rated_items, food_df, columns).head()
get_popular_items(frequent_items, food_df, columns).head(3)

Unnamed: 0,title,canteen_id,price,comment
0,Paneer Masala,1,60,based on your past orders
1,Chapati,1,20,based on your past orders
2,Veg Biriyani,1,60,based on your past orders
3,Paneer Butter Masala,1,60,based on your past orders


Unnamed: 0,title,canteen_id,price,comment
0,Cheese Maggi,1,25,new/today's special item in your home canteen
1,Paneer Tikka,1,60,new/today's special item in your home canteen
2,Chicken Tikka,1,80,new/today's special item in your home canteen
3,Chicken Butter Masala,1,80,new/today's special item in your home canteen
4,Veg Biriyani,1,60,new/today's special item in your home canteen


Unnamed: 0,title,canteen_id,price,comment
0,Chapathi,1,25,top rated items across canteens
1,Vanilla MilkShake,1,80,top rated items across canteens
2,Paneer Tikka,1,60,top rated items across canteens
3,Chicken Paneer Butter Masala,1,60,top rated items across canteens
4,Paneer Butter Masala,1,60,top rated items across canteens


Unnamed: 0,title,canteen_id,price,comment
0,Paneer Tikka,1,60,most popular items across canteens
1,Chicken Maggi,1,50,most popular items across canteens
2,Veg Noodles,1,145,most popular items across canteens


Testing the Recommendations!!
If we are having the Webapp we can test with the order click, here we are not having the web app, so we are testing the user previous order history

In [30]:
order_data = pd.read_csv("./db/new_orders.csv")
order_data.head()

Unnamed: 0,order_id,user_id,food_id,canteen_id,date_time,status,amount
0,1,8,5,1,2019-06-28 9:26:03,served,60
1,2,11,5,1,2019-06-29 9:26:03,served,60
2,3,8,6,1,2019-06-30 9:26:03,served,80
3,4,12,5,1,2019-07-01 9:26:03,served,60
4,5,8,5,1,2019-07-02 9:26:03,served,60


In [31]:
res_titles = []
for index, row in res.iterrows():
    res_titles.append(row.title)
res_titles

res_tags = []
for index, row in food_df.iterrows():
    if row.title in res_titles:
        r = row.tags.split(',')
        r = list(map(str.strip, r))
        for i in r:
            if i not in res_tags:
                res_tags.append(i)

print("Recommended Tags ",res_tags)

['Paneer Masala', 'Chapati', 'Veg Biriyani', 'Paneer Butter Masala']

Recommend Tags  ['veg', 'healthy', 'spicy']


In [32]:
current_user_orders = order_data[order_data['user_id'] == current_user]
res_food_ids = []
for index, row in current_user_orders.iterrows():
    res_food_ids.append(row.food_id)
# print(res_food_ids)

tp,fn=0,0
for ids in res_food_ids:
    value_tags = food_df[food_df['food_id'] == ids]["tags"].values[0].split(',')
    value_tags = list(map(str.strip, value_tags))
    pc,nc=0,0
    for i in value_tags:
        if i not in res_tags:
            nc += 1
        else:
            pc += 1
    if(pc>nc):
        tp += 1
    else:
        fn += 1

print("Accuracy",(tp)/len(res_food_ids))
# print(tp,fn)
# print(tp/(tp+fn))


Accuracy 0.9838709677419355
