In [39]:
# importing the required libraries to function the recommendation system
import numpy as np
import pandas as pd
import ast
import nltk
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [40]:
# reading the required dataset
data=pd.read_csv('walmart_mock_products_1000_with_images.csv')

In [41]:
data.head(1)

Unnamed: 0,name,brand,category,aisle,price,image_url,description,on_offer,discount_percent,stock,is_sponsored,tags
0,General Mills Pasta Item 1,General Mills,Pasta,6,4.44,https://images.unsplash.com/photo-160089196459...,A quality pasta product from General Mills.,False,0.0,41,False,"non-GMO, heart healthy, vegan"


In [42]:
data = data[['name', 'brand', 'category', 'price', 'discount_percent', 'tags']]
data.head(1)

Unnamed: 0,name,brand,category,price,discount_percent,tags
0,General Mills Pasta Item 1,General Mills,Pasta,4.44,0.0,"non-GMO, heart healthy, vegan"


In [43]:
data.isnull().sum()
data.dropna(inplace=True)
data.duplicated().sum()

np.int64(0)

In [44]:
data.shape

(1000, 6)

In [45]:
# Remove "Item X" from the 'name' column
import re

data['name'] = data['name'].str.replace(r'\s*Item\s*\d+', '', regex=True)
data.head(5)

Unnamed: 0,name,brand,category,price,discount_percent,tags
0,General Mills Pasta,General Mills,Pasta,4.44,0.0,"non-GMO, heart healthy, vegan"
1,General Mills Pet Food,General Mills,Pet Food,6.88,0.0,organic
2,Heinz Cleaning,Heinz,Cleaning,22.98,0.0,"vegan, organic"
3,Quaker Beverages,Quaker,Beverages,11.4,0.0,"organic, low sugar"
4,Barilla Vegetables,Barilla,Vegetables,5.84,0.0,"vegan, organic, gluten-free"


In [46]:
data['name']= data['name'].apply(lambda x: x.lower())
data['brand']= data['brand'].apply(lambda x: x.lower())
data['category']= data['category'].apply(lambda x: x.lower())
data['tags'] = data['tags'].apply(lambda x: x.lower())

In [50]:
data.head(5)

Unnamed: 0,name,brand,category,price,discount_percent,tags
0,general mills pasta,general mills,pasta,4.44,0.0,"non-gmo, heart healthy, vegan"
1,general mills pet food,general mills,pet food,6.88,0.0,organic
2,heinz cleaning,heinz,cleaning,22.98,0.0,"vegan, organic"
3,quaker beverages,quaker,beverages,11.4,0.0,"organic, low sugar"
4,barilla vegetables,barilla,vegetables,5.84,0.0,"vegan, organic, gluten-free"


In [55]:
# Combine features for better recommendations
data['combined'] = data['name'] + ' ' + data['brand'] + ' ' + data['category'] + ' ' + data['tags']

# Vectorize the combined column
cv = CountVectorizer(max_features=1000, stop_words='english')
vectors = cv.fit_transform(data['combined']).toarray()

# Compute cosine similarity
similarity = cosine_similarity(vectors)

In [68]:
complementary = {
    'bread': ['jam', 'butter', 'peanut butter'],
    'pasta': ['sauce', 'cheese'],
    'chips': ['dip', 'salsa'],
    'beverages': ['snacks', 'cookies'],  # Example for beverages
    'snacks': ['beverages', 'chips'],  # Example for snacks
    'cookies': ['milk', 'coffee'],  # Example for cookies
    'cereal': ['milk', 'fruit'],  # Example for cereal
    'cleaning supplies': ['disinfectant', 'sponges'],  # Example for cleaning supplies
    'personal care': ['toothpaste', 'shampoo'],  # Example for personal
    'frozen foods': ['microwave meals', 'ice cream'],  # Example for frozen foods
    'pet supplies': ['pet food', 'toys'],  # Example for pet supplies
    'baby products': ['diapers', 'wipes'],  # Example for baby products
    'health supplements': ['vitamins', 'protein powder'],  # Example for health supplements
    'electronics': ['accessories', 'chargers'],  # Example for electronics
    'clothing': ['shoes', 'accessories'],  # Example for clothing
    'furniture': ['decor', 'bedding'],  # Example for furniture
    'toys': ['games', 'puzzles'],  # Example for toys
    'vegetables': ['fruits', 'salad dressing'],  # Example for vegetables
    'fruits': ['vegetables', 'yogurt'],  # Example for fruits
    # Add more mappings as needed
}

def recommend(search_term):
    search_term = search_term.lower()
    # Find matching category
    matched_categories = data[data['category'].str.contains(search_term, case=False, na=False)]['category'].unique()
    if len(matched_categories) == 0:
        return f"No products found in the category '{search_term}'."
    category = matched_categories[0]
    category_products = data[data['category'] == category]
    # Similar products
    index = category_products.index[0]
    distances = list(enumerate(similarity[index]))
    category_indices = category_products.index.tolist()
    filtered_distances = [d for d in distances if d[0] in category_indices and d[0] != index]
    similar_products = sorted(filtered_distances, key=lambda x: x[1], reverse=True)[:5]
    results = []
    for i in similar_products:
        product_info = {
            'name': data.iloc[i[0]]['name'],
            'brand': data.iloc[i[0]]['brand'],
            'price': data.iloc[i[0]]['price'],
            'discount_percent': data.iloc[i[0]]['discount_percent'],
            'category': data.iloc[i[0]]['category']
        }
        results.append(product_info)
    # Complementary products
    comp_cats = complementary.get(category, [])
    for comp_cat in comp_cats:
        comp_products = data[data['category'].str.contains(comp_cat, case=False, na=False)].head(3)
        for _, row in comp_products.iterrows():
            comp_info = {
                'name': row['name'],
                'brand': row['brand'],
                'price': row['price'],
                'discount_percent': row['discount_percent'],
                'category': row['category']
            }
            results.append(comp_info)
    if not results:
        results = category_products.head(5)[['name', 'brand', 'price', 'discount_percent', 'category']].to_dict('records')
    return results

In [69]:
# Try with any existing product name from the dataset
pd.DataFrame(recommend("beverages"))


Unnamed: 0,name,brand,price,discount_percent,category
0,quaker beverages,quaker,24.73,0.0,beverages
1,quaker beverages,quaker,8.75,0.0,beverages
2,quaker beverages,quaker,7.7,0.0,beverages
3,quaker beverages,quaker,1.83,0.0,beverages
4,quaker beverages,quaker,7.7,0.0,beverages
5,general mills snacks,general mills,16.9,5.2,snacks
6,great value snacks,great value,8.29,0.0,snacks
7,campbell's snacks,campbell's,6.25,0.0,snacks
