In [2]:
pip install git+https://github.com/daviddavo/lightfm

Collecting git+https://github.com/daviddavo/lightfm
  Cloning https://github.com/daviddavo/lightfm to /tmp/pip-req-build-ihd8ymwr
  Running command git clone --filter=blob:none --quiet https://github.com/daviddavo/lightfm /tmp/pip-req-build-ihd8ymwr
  Resolved https://github.com/daviddavo/lightfm to commit f0eb500ead54ab65eb8e1b3890337a7223a35114
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lightfm
  Building wheel for lightfm (setup.py) ... [?25l[?25hdone
  Created wheel for lightfm: filename=lightfm-1.17-cp312-cp312-linux_x86_64.whl size=1099142 sha256=ccd0c7da4a754d9fddddc053fa61751d8d33c62000bc14783acb7aa583f82a2b
  Stored in directory: /tmp/pip-ephem-wheel-cache-au466otg/wheels/fd/89/93/70c1e5f378ee5043de89387ee3ef6852ff39e3b9eb44ecc1a3
Successfully built lightfm
Installing collected packages: lightfm
Successfully installed lightfm-1.17


In [30]:


import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.preprocessing import LabelBinarizer
from scipy.sparse import coo_matrix, hstack, csr_matrix
from lightfm import LightFM
from lightfm.data import Dataset

In [31]:
transactions = pd.read_json("generated_sales_dataset.json", orient="records")
allergy_info = pd.read_json("product_allergy.json", orient="records")

In [32]:
transactions['buying_date'] = pd.to_datetime(transactions['buying_date'])
transactions = transactions[transactions['order_status'].str.lower().isin(['delivered', 'completed', 'delivered '])]

In [33]:
transactions['rate'] = transactions['rate'].fillna(0)
max_rate = transactions['rate'].max() if transactions['rate'].max() > 0 else 1
transactions['rate_norm'] = transactions['rate'] / max_rate
transactions['interaction'] = np.log1p(transactions['quantity']) * (1 + 0.5 * transactions['rate_norm'])

In [34]:
ui = transactions.groupby(['customer_id', 'product_code']).agg({
    'interaction': 'sum',
    'buying_date': 'max'
}).reset_index().rename(columns={'buying_date': 'last_buy_date'})

In [35]:
products = transactions[['product_code','product_name','product_category']].drop_duplicates('product_code').set_index('product_code')
products = products.join(allergy_info.set_index('product_code'), how='left')
products['allergic_available'] = products['allergic_available'].fillna(True)

In [36]:
dataset = Dataset()
dataset.fit(
    (str(u) for u in transactions['customer_id'].unique()),
    (str(i) for i in products.index),
    user_features = set(transactions['customer_age_group'].unique().astype(str)).union(set(transactions['province'].unique().astype(str))),
    item_features = set(products['product_category'].unique().astype(str))
)

In [37]:
item_features_list = []
for code, row in products.iterrows():
    feats = [str(row['product_category'])]
    if not row['allergic_available']:
        feats.append("allergic_available")
    item_features_list.append((str(code), feats))

(interactions_matrix, weights) = dataset.build_interactions(
    [(str(r.customer_id), str(r.product_code), r.interaction) for r in ui.itertuples()]
)
dataset.fit_partial(items=(str(i) for i in products.index), item_features=[f for _, feats in item_features_list for f in feats])
dataset.fit_partial(users=(str(u) for u in transactions['customer_id'].unique()))

dataset.build_item_features(item_features_list)

model = LightFM(loss='warp')
model.fit(interactions_matrix, sample_weight=weights, epochs=30, num_threads=4)

<lightfm.lightfm.LightFM at 0x793f068a5c40>

In [38]:
def make_user_profile_vector(age_group, province, religion, family_size, allergic_flag):
    return {'age_group': str(age_group), 'province': str(province), 'religion': str(religion), 'family_size': int(family_size), 'allergic_flag': bool(allergic_flag)}

def prefilter_products_for_user(user_profile, products_df):
    if user_profile['allergic_flag']:
        candidates = products_df[products_df['allergic_available'] == True].copy()
    else:
        candidates = products_df.copy()
    return candidates

In [39]:
def recommend_for_existing_user(user_id, user_profile, topk=6):
    user_hist = ui[ui['customer_id']==user_id].copy()
    if not user_hist.empty:
        user_hist['days_since'] = (pd.Timestamp.now() - user_hist['last_buy_date']).dt.days
        user_hist['hist_score'] = user_hist['interaction'] * (1.0 / (1 + user_hist['days_since']/30.0))
        top_hist = user_hist.sort_values('hist_score', ascending=False).head(10)
    else:
        top_hist = pd.DataFrame(columns=ui.columns)
    candidates = prefilter_products_for_user(user_profile, products)
    hist_candidates = top_hist[top_hist['product_code'].isin(candidates.index)].head(3)['product_code'].tolist()

    user_repr = str(user_id)
    known_items = set(ui[ui['customer_id']==user_id]['product_code'])
    all_item_codes = list(candidates.index)
    scores = []
    for item_code in all_item_codes:
        try:
            score = model.predict(dataset.mapping()[0][user_repr], dataset.mapping()[2][item_code])
        except Exception:
            score = 0.0
        scores.append((item_code, score))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    model_candidates = [c for c,_ in scores if c not in hist_candidates and c not in known_items][:10]


    def content_score(item_code):
        row = products.loc[item_code]
        score = 0.0
        if user_profile['family_size'] >= 4 and 'Pack' in (row['product_name'] or ''):
            score += 0.4
        if user_profile['age_group'] in ['31-45','46-60'] and 'Marinated' in (row['product_category'] or ''):
            score += 0.2
        return score

    ranked_new = sorted(model_candidates, key=lambda x: (next(s for i,s in scores if i==x) + content_score(x)), reverse=True)

    final = []
    final += hist_candidates[:3]
    final += ranked_new[:3]
    if len(final) < topk:
        pop = transactions[transactions['product_code'].isin(candidates.index)].groupby('product_code').size().sort_values(ascending=False)
        for p in pop.index:
            if p not in final:
                final.append(p)
            if len(final) == topk:
                break
    result = [{'product_name': products.loc[c]['product_name'], 'product_category': products.loc[c]['product_category']} for c in final[:topk]]
    return result

In [40]:
def recommend_for_new_user(user_profile, topk=6):
    candidates = prefilter_products_for_user(user_profile, products)
    pop = transactions[transactions['product_code'].isin(candidates.index)].groupby('product_code').size()
    def content_score(item_code):
        row = products.loc[item_code]
        s = 0.0
        if row['product_category'] and 'Ready to Cook' in row['product_category']:
            s += 0.5
        if user_profile['family_size'] >= 4 and 'Pack' in (row['product_name'] or ''):
            s += 0.4
        return s
    scored = []
    for c in candidates.index:
        p = pop.get(c, 0)
        s = 0.6 * np.log1p(p) + 0.4 * content_score(c)
        scored.append((c, s))
    scored = sorted(scored, key=lambda x: x[1], reverse=True)[:topk]
    result = [{'product_name': products.loc[c]['product_name'], 'product_category': products.loc[c]['product_category']} for c,_ in scored]
    return result

In [41]:
if __name__ == "__main__":
    user_input_existing = {'age_group': '31-45', 'family_size': 3, 'religion': 'Buddhist', 'allergic_flag': True, 'province': 'Western'}
    user_id_example = 10000000000000000000
    recs_existing = recommend_for_existing_user(user_id_example, user_input_existing, topk=6)
    print("Existing user recommendations:", recs_existing)

    user_input_new = {'age_group': '18-30', 'family_size': 1, 'religion': 'None', 'allergic_flag': False, 'province': 'Western'}
    recs_new = recommend_for_new_user(user_input_new, topk=6)
    print("New user recommendations:", recs_new)

Existing user recommendations: [{'product_name': 'Spiced Fish Fillets', 'product_category': 'Marinated Meat & Ready to Cook'}, {'product_name': 'Tuna', 'product_category': 'Fish'}, {'product_name': 'Mackerel', 'product_category': 'Fish'}, {'product_name': 'Frozen Shellfish', 'product_category': 'Frozen Meat & Seafood'}, {'product_name': 'Frozen Fish Fillets', 'product_category': 'Frozen Meat & Seafood'}, {'product_name': 'Salmon Fillets', 'product_category': 'Fish'}]
New user recommendations: [{'product_name': 'Frozen Shellfish', 'product_category': 'Frozen Meat & Seafood'}, {'product_name': 'Frozen Chicken Cuts', 'product_category': 'Frozen Meat & Seafood'}, {'product_name': 'Meat Skewers', 'product_category': 'Marinated Meat & Ready to Cook'}, {'product_name': 'Frozen Fish Fillets', 'product_category': 'Frozen Meat & Seafood'}, {'product_name': 'Spiced Fish Fillets', 'product_category': 'Marinated Meat & Ready to Cook'}, {'product_name': 'Tuna', 'product_category': 'Fish'}]
