In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
data = {
  "Product_Name": ["RSPCA Approved Chicken Livers","RSPCA Approved Chicken Giblets","RSPCA Approved Chicken Frames","Creme Gold Washed Potatoes Loose",
                   "Baby White Potatoes Prepacked","Potatoes Prepacked","Happy Gut Wholemeal","Low Carb Bread","Happy Gut Multigrain"],  
  "Category": ["Meat & seafood", "Meat & seafood","Meat & seafood","Fruit & vegetables","Fruit & vegetables","Fruit & vegetables","Bakery","Bakery","Bakery"],
  "Sub_category": ["Poultry", "Poultry","Poultry","Vegetables", "Vegetables", "Vegetables","Packaged Breads","Packaged Breads","Packaged Breads"],
  "Product_Group": ["Chicken offal", "Chicken offal", "Chicken offal","Potatoes","Potatoes","Potatoes","Digestive Health & Speciality Bread",
                    "Digestive Health & Speciality Bread","Digestive Health & Speciality Bread"],
  "Brand": ["Coles","Coles","Coles","Coles","Coles","Spud Lite","Coles","Herman Brot","Coles"],
  "Sku": ["1718058P","2565429P","3199541P","1182162P","2488485P","3026441P","4727962P","4654122P","4727995P"]
}

df = pd.DataFrame(data)

# combining features to create a single feature string
df['combined_features'] = df['Category'] + ' ' + df['Sub_category'] + ' ' + df['Product_Group'] + ' ' + df['Brand']

# initializing TfidfVectorizer to convert text data into numerical vectors
tfidf_vectorizer = TfidfVectorizer()

# fitting and transforming the combined features to TF-IDF vectors
tfidf_matrix = tfidf_vectorizer.fit_transform(df['combined_features'])

# cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# function to recommend similar products
def recommend_products(product_name, cosine_sim=cosine_sim):
    # index of the product that matches the product name
    idx = df[df['Product_Name'] == product_name].index[0]
    
    # extracting pairwise similarity 
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # sorting similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # 3 most similar products
    sim_scores = sim_scores[1:3]
    
    # product indices
    product_indices = [i[0] for i in sim_scores]
    
    return df['Product_Name'].iloc[product_indices]

In [3]:
# recommending product
recommended_products = recommend_products("RSPCA Approved Chicken Livers")
print("Recommended Products for 'RSPCA Approved Chicken Livers':")
print(recommended_products)

Recommended Products for 'RSPCA Approved Chicken Livers':
1    RSPCA Approved Chicken Giblets
2     RSPCA Approved Chicken Frames
Name: Product_Name, dtype: object
