# Collaborative Fitting Recommendation Model

### Import necessary libraries

In [7]:
import pandas as pd

### Get data

In [9]:
grocery_sells = 'grocery_sells.csv'
grocery_df = pd.read_csv(grocery_sells)

### View entire dataframe

In [37]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Obtain only necessary columns

In [21]:
grocery_df = grocery_df[['Customer Name', 'Category', 'Sub Category']]

### Print some data

In [23]:
grocery_df.head()

Unnamed: 0,Customer Name,Category,Sub Category
0,Harish,Oil & Masala,Masalas
1,Sudha,Beverages,Health Drinks
2,Hussain,Food Grains,Atta & Flour
3,Jackson,Fruits & Veggies,Fresh Vegetables
4,Ridhesh,Food Grains,Organic Staples


### Check for missing data

In [25]:
grocery_df.isnull().sum()

Customer Name    0
Category         0
Sub Category     0
dtype: int64

### Group data by customer name and sub category, Determine the sub categories the customer has purchased from using a pivot table

In [43]:
grouped_df = grocery_df.groupby(['Customer Name', 'Sub Category']).size().unstack(fill_value=0).reset_index()

In [44]:
grouped_df
    

Sub Category,Customer Name,Atta & Flour,Biscuits,Breads & Buns,Cakes,Chicken,Chocolates,Cookies,Dals & Pulses,Edible Oil & Ghee,Eggs,Fish,Fresh Fruits,Fresh Vegetables,Health Drinks,Masalas,Mutton,Noodles,Organic Fruits,Organic Staples,Organic Vegetables,Rice,Soft Drinks,Spices
0,Adavan,8,10,12,7,5,13,11,9,8,3,7,8,8,14,7,5,13,7,5,10,6,12,17
1,Aditi,10,7,12,14,4,11,10,8,14,4,3,3,7,15,7,7,5,9,9,6,4,12,6
2,Akash,10,14,11,6,11,4,9,10,14,9,4,7,4,13,7,7,10,10,5,2,10,10,9
3,Alan,4,9,10,8,5,4,19,11,12,9,5,5,10,10,7,8,7,4,10,9,11,13,8
4,Amrish,9,17,15,10,4,6,12,13,3,5,6,11,9,19,8,8,17,6,7,6,11,13,12
5,Amy,10,6,10,11,2,12,12,8,6,9,4,8,5,22,10,7,12,8,7,6,2,11,8
6,Anu,6,1,7,7,9,14,9,4,11,8,9,4,0,18,10,11,10,6,5,8,11,13,5
7,Arutra,6,10,16,7,8,8,8,8,9,11,9,8,14,12,12,9,7,7,11,9,2,18,9
8,Arvind,5,11,10,12,5,5,9,9,11,8,9,10,13,13,11,4,10,8,7,8,7,11,7
9,Esther,9,9,11,8,3,7,14,1,9,3,13,7,7,9,7,8,17,3,7,8,8,16,5


### Set index to Customer Name for recommendation generation

In [45]:
pivot_df = grouped_df.set_index('Customer Name', inplace=True)

### Get total number of purchases in each category

In [148]:
subcategory_purchase = grocery_df.groupby(['Category', 'Sub Category']).size().reset_index(name='Counts')

In [149]:
subcategory_purchase

Unnamed: 0,Category,Sub Category,Counts
0,Bakery,Biscuits,459
1,Bakery,Breads & Buns,502
2,Bakery,Cakes,452
3,Beverages,Health Drinks,719
4,Beverages,Soft Drinks,681
5,"Eggs, Meat & Fish",Chicken,348
6,"Eggs, Meat & Fish",Eggs,379
7,"Eggs, Meat & Fish",Fish,369
8,"Eggs, Meat & Fish",Mutton,394
9,Food Grains,Atta & Flour,353


### Get the most popular subcategory in each category

In [151]:
most_popular_subcategory = subcategory_purchase.loc[subcategory_purchase.groupby('Category')['Counts'].idxmax()]

In [152]:
most_popular_subcategory

Unnamed: 0,Category,Sub Category,Counts
1,Bakery,Breads & Buns,502
3,Beverages,Health Drinks,719
8,"Eggs, Meat & Fish",Mutton,394
11,Food Grains,Organic Staples,372
13,Fruits & Veggies,Fresh Fruits,369
18,Oil & Masala,Masalas,463
21,Snacks,Cookies,520


### Create a dictionary holding most popular subcategories

In [153]:
category_to_most_popular_subcategory = dict(zip(most_popular_subcategory['Category'], most_popular_subcategory['Sub Category']))

In [154]:
category_to_most_popular_subcategory

{'Bakery': 'Breads & Buns',
 'Beverages': 'Health Drinks',
 'Eggs, Meat & Fish': 'Mutton',
 'Food Grains': 'Organic Staples',
 'Fruits & Veggies': 'Fresh Fruits',
 'Oil & Masala': 'Masalas',
 'Snacks': 'Cookies'}

### Recommend one subcategory based on another subcategory

In [217]:
def recommend_subcategory(subcategory):
    # find category of subcategory
    category = subcategory_purchase.loc[subcategory_purchase['Sub Category'] == subcategory, 'Category'].iloc[0]
    
    # filter subcategory_purchase for same category
    subcategories_in_category = subcategory_purchase[subcategory_purchase['Category'] == category]
    
    # sort subcategories by count in descending order
    sorted_subcategories = subcategories_in_category.sort_values(by='Counts', ascending=False)['Sub Category'].tolist()
    
    # remove the given subcategory from the list if it exists
    if subcategory in sorted_subcategories:
        sorted_subcategories.remove(subcategory)
    
    # return most popular subcategory
    if len(sorted_subcategories) > 0:
        return sorted_subcategories[0]
    return None

In [218]:
test = recommend_subcategory('Cakes')

In [219]:
print(test)

Breads & Buns


### Build the recommendation system

In [221]:
def get_recommendations(customer_name):
    customer_row = grouped_df.loc[customer_name] # get counts per sub category by customer
    
    similar_customers = grouped_df.corrwith(customer_row, axis=1).sort_values(ascending=False) # gets correlation between customers based on their purchase history
    similar_customer = similar_customers.drop(customer_name).idxmax() # gets the most similar customer
    
    recommendations_similarity = grouped_df.loc[similar_customer] - grouped_df.loc[customer_name] # compare the purchase history between both customers
    recommendations_similarity = recommendations_similarity[recommendations_similarity > 5].index.tolist() # gets items bought by similar customer to recommend to customer
    
    recommendations_purchases = customer_row[customer_row > 11].index.tolist() # get categories where customer purchases at least 12 items
    
    favourite_subcategory = customer_row.idxmax() # get most purchased subcategory
    recommendations_category = recommend_subcategory(favourite_subcategory) # recommend similar item
    
    recommendations_items = list(set(recommendations_similarity) | set(recommendations_purchases)) # combine both recommendations
    
    if recommendations_category not in recommendations_items: # add the similar item if not already there
        recommendations_items.append(recommendations_category)
    
    return recommendations_items

### Use the system

In [222]:
def display(customer_name):
    recommend_items = get_recommendations(customer_name)
    print(f"Recommended items for {customer_name}: {recommend_items}")

In [223]:
display('Alan')
display('Harish')
display('Jonas')
display('Ram')
display('Sharon')
display('Veronica')

Recommended items for Alan: ['Soft Drinks', 'Edible Oil & Ghee', 'Cookies', 'Chocolates']
Recommended items for Harish: ['Chocolates', 'Breads & Buns', 'Soft Drinks', 'Cakes', 'Health Drinks', 'Cookies']
Recommended items for Jonas: ['Soft Drinks', 'Cakes', 'Edible Oil & Ghee', 'Cookies', 'Chocolates']
Recommended items for Ram: ['Chocolates', 'Soft Drinks', 'Health Drinks', 'Chicken', 'Biscuits']
Recommended items for Sharon: ['Soft Drinks', 'Health Drinks', 'Cookies']
Recommended items for Veronica: ['Chocolates', 'Fresh Vegetables', 'Cookies', 'Soft Drinks', 'Fresh Fruits', 'Health Drinks']
