In [1]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df2 = pd.read_csv('BigBasket.csv')  
df2 = df2.drop_duplicates(subset=['product']).reset_index(drop = True)
df2 = df2.drop(['index','sale_price', 'market_price', 'rating', 'description'], axis = 1)
df2 = df2.dropna().reset_index(drop = True)

def process_and_combine(row):
    combined = ' '.join(row.drop('product')).lower()
    return f"{combined}"

df2['soup'] = df2.apply(process_and_combine, axis=1)
df2['soup'] = df2['soup'].str.replace(r'[&,]', ' ', regex=True)
df2['soup'] = df2['soup'].str.split().str.join(' ')

In [2]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df2['soup'])
cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
indices = pd.Series(df2['product'])

In [4]:
def recommendations(title, cosine_sim=cos_sim):
    index = indices[indices == title].index[0]
    similarity_scores = pd.Series(cosine_sim[index]).sort_values(ascending=False)
    top_10_products = list(similarity_scores.iloc[1:11].index)
    return [list(df2['product'])[i] for i in top_10_products]

In [5]:
recommendations("Turmeric Powder/Arisina Pudi")

['Powder - Chilli',
 'Combo Pack - Chilli, Turmeric & Coriander (200g Each)',
 'Compounded Asafoetida - Cake',
 'Asafoetida Powder',
 'Punjabi Chole Masala',
 'Paneer Masala',
 'Biriyani masala',
 'Meat/Mutton Masala',
 'Red Chilli Powder 200G +Coriander/Dhania Powder 200G +Turmeric/Haldi Powder 200G',
 'Chicken Tandoori Masala']

In [7]:
recommendations_data = []

for product in df2['product']:
    recommended_products = recommendations(product)
    recommendations_data.append([product, recommended_products])

In [9]:
# Tạo DataFrame từ danh sách recommendations_data
recommendations_df = pd.DataFrame(recommendations_data, columns=['product', '10_rcm_product'])

In [13]:
recommendations_df[recommendations_df['product'] == 'Turmeric Powder/Arisina Pudi'].values[0][1]

['Powder - Chilli',
 'Combo Pack - Chilli, Turmeric & Coriander (200g Each)',
 'Compounded Asafoetida - Cake',
 'Asafoetida Powder',
 'Punjabi Chole Masala',
 'Paneer Masala',
 'Biriyani masala',
 'Meat/Mutton Masala',
 'Red Chilli Powder 200G +Coriander/Dhania Powder 200G +Turmeric/Haldi Powder 200G',
 'Chicken Tandoori Masala']

In [20]:
app = dash.Dash(__name__)
server = app.server

app.layout = html.Div(
    style={
        'width': '600px',
        'height': '400px',
        'padding': '10px',
        'font-family': 'cursive',
        'background-color': '#FFC7C7',
        'border-radius': '16px',
        'display': 'flex',
        'flex-direction': 'column',
        'gap': '10px'
    },
    children=[
        html.H1("Product Recommendation System", style={'text-align': 'center', 'color': '#8785A2'}),
        dcc.Dropdown(
            id='product-dropdown',
            options=[{'label': product, 'value': product} for product in recommendations_df['product']],
            value='Turmeric Powder/Arisina Pudi',
            style={
                'background-color': '#FFE2E2',
                'color': '#8785A2',
                'border': '1px solid #8785A2',
                'font-size': '14px',
                'border-radius': '8px'
            },
        ),
        html.Div(id='recommendations', style={'background-color': 'white', 'border-radius': '8px'})
    ]
)

@app.callback(
    Output('recommendations', 'children'),
    [Input('product-dropdown', 'value')]
)
def update_recommendations(selected_product):
    recommended_products = recommendations_df[recommendations_df['product'] == selected_product].values[0][1]

    return html.Div(
        children=[html.P(product, style={'color': '#8785A2', 'font-size': '14px', 'line-height': '10px'})
                  for product in recommended_products], style={'padding-left': '10px'})

if __name__ == '__main__':
    app.run_server(debug=False)


In [21]:
recommendations("Glass Water Bottle - Aquaria Organic Purple")

['Water Bottle - Fridge, Tulip, Dark Blue',
 'H2O Unbreakable Water Bottle - Pink',
 'Water Bottle - Crystal, Blue',
 'H2O Unbreakable Water Bottle - Blue',
 'Glass Water Bottle - Circo Multicolour Flowers',
 'H2o Unbreakable Water Bottle - Orange',
 'Water Bottle - Polka, Orange',
 'Water Bottle - Fridge, Tulip, Purple',
 'Glass Water Bottle - Bri Black & Grey',
 'Glass Water Bottle - Circo Orange & Lemon']

In [22]:
recommendations_df.to_csv('Product_rcm_data.csv', index=False)

In [14]:
import pandas as pd
recommendations_df = pd.read_csv('Product_rcm_data.csv')

In [18]:
# Chuỗi giá trị sản phẩm
product_str = "['Sukesha Taila - for Healthy Hair', 'Brahmi Bhringaraj Taila - Anti Graying', 'Flaxseed Oil - Omega-3, Omega-6, Omega-9 Vegetarian Capsule', 'Evening Primrose Oil - Vegetarian Capsule (500 mg)', 'Rejuvenating Night Cream - Cucumber Lime & Aloe Vera', 'Exfoliating Face Scrub - Aloe Vera & Grapes', 'Face Scrub - Walnut Orange', 'Anti Acne Gel - for Spotless Skin', 'Protecting Sunscreen - Aloe Vera Rose & Almond', 'Dawn to Dusk Fortifying Cream - Rose Cucumber & Almond']"

# Tách giá trị thành danh sách
product_list = [product.strip(" '[]") for product in product_str.split(',')]
product_list

['Sukesha Taila - for Healthy Hair',
 'Brahmi Bhringaraj Taila - Anti Graying',
 'Flaxseed Oil - Omega-3',
 'Omega-6',
 'Omega-9 Vegetarian Capsule',
 'Evening Primrose Oil - Vegetarian Capsule (500 mg)',
 'Rejuvenating Night Cream - Cucumber Lime & Aloe Vera',
 'Exfoliating Face Scrub - Aloe Vera & Grapes',
 'Face Scrub - Walnut Orange',
 'Anti Acne Gel - for Spotless Skin',
 'Protecting Sunscreen - Aloe Vera Rose & Almond',
 'Dawn to Dusk Fortifying Cream - Rose Cucumber & Almond']

In [21]:
product_list[0]

'Sukesha Taila - for Healthy Hair'

In [19]:
# Xóa dấu ngoặc vuông từ cột '10_rcm_product'
recommendations_df['10_rcm_product'] = recommendations_df['10_rcm_product'].str.replace(r'\[|\]', '', regex=True)
recommendations_df['10_rcm_product']
# Lưu DataFrame vào tệp CSV mới
# recommendations_df.to_csv('Cleaned_Product_rcm_data.csv', index=False)


0        'Sukesha Taila - for Healthy Hair', 'Brahmi Bh...
1        'Double Walled Glass Bottle With Gray Cap - BB...
2        'Brass Angle Deep Stand - Plain, No.3', 'Brass...
3        'Maharaja Plastic Basket With Lid - Small', 'T...
4        'Creme Care Soap - For Hands & Body', 'Soap - ...
                               ...                        
23534    'Wash Sponge - Micro Fiber Chenille', 'Dustpac...
23535    'Organic Seeds - Brown Mustard/Sasive', 'Black...
23536    'Wottagirl - Divine Perfume Spray', 'Wottagirl...
23537    'Barbeque Beetroot Chips', 'Chia Seeds Chips',...
23538    'Black Tea', 'Elaichi Tea', 'Masala Tea', 'Tea...
Name: 10_rcm_product, Length: 23539, dtype: object

In [12]:
df = pd.read_csv('Product_rcm_data.csv')
# recommendations_df['10_rcm_product'] = recommendations_df['10_rcm_product'].str.replace(r"\[|\]|\'", '', regex=True)


In [13]:
df.head()

Unnamed: 0,product,10_rcm_product
0,Garlic Oil - Vegetarian Capsule 500 mg,"['Sukesha Taila - for Healthy Hair', 'Brahmi B..."
1,Water Bottle - Orange,['Double Walled Glass Bottle With Gray Cap - B...
2,"Brass Angle Deep - Plain, No.2","['Brass Angle Deep Stand - Plain, No.3', 'Bras..."
3,Cereal Flip Lid Container/Storage Jar - Assort...,"['Maharaja Plastic Basket With Lid - Small', '..."
4,Creme Soft Soap - For Hands & Body,"['Creme Care Soap - For Hands & Body', 'Soap -..."


In [18]:
ast.literal_eval(df[df['product'] == 'Stainless Steel Pav Bhaji/Idli Oval Shaped Plate']['10_rcm_product'].values[0])[4]

'Steel Dinner Plate/Thali - No. 12, China'

In [38]:
recommendations_df['10_rcm_product'][0].split(', ')

'Brahmi Bhringaraj Taila - Anti Graying'