## Zepto Recommendation System

In [1]:
!pip install mlxtend fuzzywuzzy


Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from fuzzywuzzy import process




In [3]:
# Load the data
df = pd.read_csv('session_data.csv')
df.head()


  and should_run_async(code)


Unnamed: 0,session_id,product_name
0,000ed966131fcb96e0efc4ff2b716a3e,beetroot
1,000ed966131fcb96e0efc4ff2b716a3e,cucumber
2,0013eab657eaf2d82d7f1e13023d95c2,onion
3,0013eab657eaf2d82d7f1e13023d95c2,long shelf life milk
4,0013fabde1e543dd541be925266aadbc,dates


In [4]:
# Create a basket dataset
basket = df.groupby(['session_id', 'product_name'])['product_name'].count().unstack().fillna(0)
basket = basket.applymap(lambda x: 1 if x > 0 else 0)

  and should_run_async(code)
  basket = basket.applymap(lambda x: 1 if x > 0 else 0)


In [5]:
# Find frequent itemsets
frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)

  and should_run_async(code)


In [6]:

# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules = rules.sort_values('lift', ascending=False)

  and should_run_async(code)


In [7]:

# Create a CountVectorizer object
vectorizer = CountVectorizer(token_pattern=r'\b\w+\b')

  and should_run_async(code)


In [8]:
# Create a document-term matrix
dtm = vectorizer.fit_transform(df['product_name'].unique())

  and should_run_async(code)


In [9]:
# Compute cosine similarity
cosine_sim = cosine_similarity(dtm)

  and should_run_async(code)


In [10]:

# Create a DataFrame with product names and their indices
product_indices = pd.Series(range(len(df['product_name'].unique())), index=df['product_name'].unique())

  and should_run_async(code)


In [11]:
def get_association_recommendations(product, rules, top_n=5):
    product_rules = rules[rules['antecedents'].apply(lambda x: product in x)]
    if product_rules.empty:
        return []
    recommendations = product_rules.nlargest(top_n, 'lift')
    return recommendations['consequents'].apply(lambda x: list(x)[0]).tolist()

  and should_run_async(code)


In [12]:

def get_content_based_recommendations(product, cosine_sim=cosine_sim, product_indices=product_indices, top_n=5):
    if product not in product_indices.index:
        return []
    idx = product_indices[product]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    product_indices_rec = [i[0] for i in sim_scores]
    return df['product_name'].unique()[product_indices_rec].tolist()

  and should_run_async(code)


In [13]:
def get_popular_products(df, top_n=5):
    return df['product_name'].value_counts().nlargest(top_n).index.tolist()

  and should_run_async(code)


In [14]:
def get_hybrid_recommendations(product, rules, cosine_sim, product_indices, df, top_n=5):
    association_recs = get_association_recommendations(product, rules, top_n)
    content_recs = get_content_based_recommendations(product, cosine_sim, product_indices, top_n)

    # Combine and deduplicate recommendations
    hybrid_recs = list(dict.fromkeys(association_recs + content_recs))

    # If we don't have enough recommendations, add popular products
    if len(hybrid_recs) < top_n:
        popular_products = get_popular_products(df, top_n)
        hybrid_recs.extend([p for p in popular_products if p not in hybrid_recs])

    return hybrid_recs[:top_n]

  and should_run_async(code)


In [15]:

def find_similar_products(query, products, limit=5):
    return process.extract(query, products, limit=limit)

  and should_run_async(code)


In [16]:
def interactive_recommendations(query, df, rules, cosine_sim, product_indices):
    products = df['product_name'].unique()
    similar_products = find_similar_products(query, products)

    if similar_products[0][1] == 100:  # Exact match found
        selected_product = similar_products[0][0]
    else:
        print(f"Did you mean one of these products?")
        for i, (product, score) in enumerate(similar_products, 1):
            print(f"{i}. {product}")

        choice = int(input("Enter the number of your choice (or 0 to exit): "))
        if choice == 0:
            return
        selected_product = similar_products[choice-1][0]

    recommendations = get_hybrid_recommendations(selected_product, rules, cosine_sim, product_indices, df)
    print(f"\nRecommendations for {selected_product}:")
    for i, rec in enumerate(recommendations, 1):
        print(f"{i}. {rec}")

  and should_run_async(code)


### Testing

In [18]:
# Test the interactive recommendation system
while True:
    query = input("Enter a product name (or 'quit' to exit): ")
    if query.lower() == 'quit':
        break
    interactive_recommendations(query, df, rules, cosine_sim, product_indices)
    print()

  and should_run_async(code)


Enter a product name (or 'quit' to exit): tea
Did you mean one of these products?
1. tea premix
2. tea & chai
3. green tea
4. leaf tea
5. flavoured tea
Enter the number of your choice (or 0 to exit): 1

Recommendations for tea premix:
1. coffee premix
2. tea & chai
3. green tea
4. leaf tea
5. flavoured tea

Enter a product name (or 'quit' to exit): curd

Recommendations for curd:
1. fresh cow milk
2. cucumber
3. beetroot
4. onion
5. long shelf life milk

Enter a product name (or 'quit' to exit): quit
