**Importing the Libraries**

In [None]:
import pandas as pd
from google.colab import drive
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from google.colab import drive
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

**Importing the data**

In [None]:
def optimise_data(data):    
    #Optimizing the memory use by down sizing the data types to the requirements
    data['order_id']=data['order_id'].astype('int32')
    data['user_id']=data['user_id'].astype('int32')
    data['order_number']=data['order_number'].astype('int16')
    data['order_dow']=data['order_dow'].astype('int16')
    data['order_hour_of_day']=data['order_hour_of_day'].astype('int16')
    data['days_since_prior_order']=data['days_since_prior_order'].astype('int16')
    data['product_id']=data['product_id'].astype('int32')
    data['add_to_cart_order']=data['add_to_cart_order'].astype('int16')
    data['reordered']=data['reordered'].astype('int16')
    data['aisle_id']=data['aisle_id'].astype('int16')
    data['department_id']=data['department_id'].astype('int16')
    data.drop(['Unnamed: 0','index'],axis=1,inplace=True)    
    return data

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data=pd.read_csv('/content/drive/MyDrive/Tittu_data/final_data.csv')
data=optimise_data(data)

In [None]:
#Transformation
data['user_purchase_count']=data.groupby('user_id')['user_id'].transform('count')
data['product_ordered_count']=data.groupby('product_id')['product_id'].transform('count')

# **Model Building**

**Collaborative filtering**

In [None]:
#Now we will be filtering the data out with relavant products
#Only those customer who has made min of 250 orders and products which were order a min of x time will be taken for further analysis
df1=data[(data['user_purchase_count']>250)]
df1=df1[(df1['product_ordered_count']>=300)]
df1=df1[df1['reordered']==1]
df1=df1[['user_id','product_name']]
df1=df1.set_index('user_id')
df1['tri']=df1.groupby(['user_id','product_name'])['product_name'].transform('count')
product_based=pd.pivot_table(df1,columns='user_id',index='product_name',values='tri')
product_based=product_based.fillna(0)
user_based=pd.pivot_table(df1,index='user_id',columns='product_name',values='tri')
user_based=user_based.fillna(0)
#del(data)
del(df1)

**Cosin similarity**

In [None]:
#finding cosin similarity
user_similarity_score=cosine_similarity(user_based)         #Compare each user with other users and find similar users
user_similarity_score.shape
product_similarity_score=cosine_similarity(product_based)         #Compare each product with other product and find similar product
product_similarity_score.shape

(11249, 11249)

In [None]:
#Defining a function which would take a product name and returns suggesion based on the product
def product_recommender(product_name):
  index=np.where(product_based.index==product_name)[0][0]
  similar_items=sorted(list(enumerate(product_similarity_score[index])),key=lambda x:x[1],reverse=True)[1:6]
  name=[]
  for i in similar_items:
    name.append(product_based.index[i[0]])
  return name

In [25]:
product_recommender('0% Fat Blueberry Greek Yogurt')

['Greek 0% Fat Strawberry on the Bottom Yogurt',
 '0% Fat Superfruits Greek Yogurt',
 'YoKids Strawberry Banana Organic Lowfat Yogurt',
 'Raspberry, Blueberry Banana Fruit Puree',
 'Roasted Red Pepper Hummus With Chips']

In [None]:
#takes user id and returns most frequently purchased products
def users_top_purchase(user_ids):
  result=data[data['user_id']==user_ids].sort_values('product_ordered_count',ascending=False)[['product_name','product_ordered_count']]
  result.drop_duplicates(inplace=True)
  return result['product_name'].head(5).values

#Defining a function which would take a user name and returns similar users
def user_recommender(user_name):
  index=np.where(user_based.index==user_name)[0][0]
  similar_items=sorted(list(enumerate(user_similarity_score[index])),key=lambda x:x[1],reverse=True)[1:6]
  name=[]
  for i in similar_items:
    name.append(user_based.index[i[0]])
  return users_top_purchase(name[0])


In [26]:
user_based.index

Int64Index([    17,     27,     31,     50,     54,     63,     71,     75,
                86,     90,
            ...
            206154, 206165, 206174, 206187, 206193, 206199, 206200, 206201,
            206206, 206208],
           dtype='int64', name='user_id', length=36576)

In [27]:
user_recommender(90)

array(['Banana', 'Large Lemon', 'Strawberries', 'Limes',
       'Organic Zucchini'], dtype=object)

**Near neighbours**

In [21]:
#Product based
table_sparse1=csr_matrix(product_based)  #To consider only value and to avoid 0 to reduce computation time
#Building the model
model=NearestNeighbors(algorithm='brute')
model.fit(table_sparse1)
#Input 
def nn_recommend_product(product_name):
  id=np.where(product_based.index==product_name)[0][0]
  distance,suggestion=model.kneighbors(product_based.iloc[id,:].values.reshape(1,-1),n_neighbors=5)
  name=[]
  for i in range(len(suggestion)):
    name.extend(product_based.index[suggestion[i]])
    return name

In [24]:
nn_recommend_product('0% Fat Blueberry Greek Yogurt')

['0% Fat Blueberry Greek Yogurt',
 'Bamboo Skewers',
 'Poppy Seed',
 'Double Superfine Mustard Powder, Original English',
 'Organic Light Corn Syrup']

In [31]:
#user based
table_sparse2=csr_matrix(user_based)  #To consider only value and to avoid 0 to reduce computation time
#Building the model
model=NearestNeighbors(algorithm='brute')
model.fit(table_sparse2)
#Input 
def nn_recommend_user(user_name):
  id=np.where(user_based.index==user_name)[0][0]
  distance,suggestion=model.kneighbors(user_based.iloc[id,:].values.reshape(1,-1),n_neighbors=5)
  name=[]
  for i in range(len(suggestion)):
    name.extend(user_based.index[suggestion[i]])
    return users_top_purchase(name[0])

In [32]:
nn_recommend_user(50)

array(['Bag of Organic Bananas', 'Organic Strawberries',
       'Organic Baby Spinach', 'Organic Hass Avocado',
       'Organic Yellow Onion'], dtype=object)

**Popularity based model**

In [43]:
#Recommends top 10 popular products
data['Product_reorder_count']=data.groupby('product_name')['reordered'].transform('sum')
data['reorder_ratio']=(data['Product_reorder_count']/(len(data['reordered']==1)))
popular_products=data[['product_id','product_name','product_ordered_count','Product_reorder_count','reorder_ratio','department']]
popular_products=popular_products.drop_duplicates().reset_index(drop=bool)
popular_products['weight']=popular_products['product_ordered_count']*popular_products['reorder_ratio']
popular_products.sort_values('weight',ascending=False)['product_name'].head(10).values

array(['Banana', 'Bag of Organic Bananas', 'Organic Strawberries',
       'Organic Baby Spinach', 'Organic Hass Avocado', 'Organic Avocado',
       'Large Lemon', 'Organic Whole Milk', 'Organic Raspberries',
       'Strawberries'], dtype=object)

Amoung all these model Collaborative filtering using cosin similary was giving Relevant recommendation