In [9]:
import json
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error


In [None]:
# Product catalog
products = [
    # Chicken
    {'code': 'M1001', 'name': 'Whole Chicken', 'category': 'Chicken',  'unit_price': 1500, 'returnable': False},
    {'code': 'M1002', 'name': 'Chicken Breast', 'category': 'Chicken',  'unit_price': 800, 'returnable': False},
    {'code': 'M1003', 'name': 'Chicken Thighs & Drumsticks', 'category': 'Chicken',  'unit_price': 650, 'returnable': False},
    {'code': 'M1004', 'name': 'Chicken Mince', 'category': 'Chicken',  'unit_price': 700, 'returnable': False},
    {'code': 'M1005', 'name': 'Organic Chicken', 'category': 'Chicken', 'unit_price': 2200, 'returnable': False},
    # Beef
    {'code': 'M1101', 'name': 'Ground Beef', 'category': 'Beef',  'unit_price': 1500, 'returnable': False},
    {'code': 'M1102', 'name': 'Beef Steaks (Ribeye)', 'category': 'Beef',  'unit_price': 3500, 'returnable': False},
    {'code': 'M1103', 'name': 'Beef Roasts', 'category': 'Beef',  'unit_price': 3200, 'returnable': False},
    {'code': 'M1104', 'name': 'Beef Ribs', 'category': 'Beef',  'unit_price': 2800, 'returnable': False},
    {'code': 'M1105', 'name': 'Grass-Fed / Organic Beef', 'category': 'Beef', 'unit_price': 4200, 'returnable': False},
    # Pork
    {'code': 'M1201', 'name': 'Pork Chops', 'category': 'Pork',  'unit_price': 2000, 'returnable': False},
    {'code': 'M1202', 'name': 'Pork Belly', 'category': 'Pork',  'unit_price': 2200, 'returnable': False},
    {'code': 'M1203', 'name': 'Pork Ribs', 'category': 'Pork',  'unit_price': 2100, 'returnable': False},
    {'code': 'M1204', 'name': 'Ground Pork', 'category': 'Pork',  'unit_price': 1900, 'returnable': False},
    {'code': 'M1205', 'name': 'Bacon', 'category': 'Pork',  'unit_price': 2500, 'returnable': False},
    # Lamb
    {'code': 'M1301', 'name': 'Lamb Chops', 'category': 'Lamb',  'unit_price': 2800, 'returnable': False},
    {'code': 'M1302', 'name': 'Lamb Leg', 'category': 'Lamb',  'unit_price': 3500, 'returnable': False},
    {'code': 'M1303', 'name': 'Ground Lamb', 'category': 'Lamb',  'unit_price': 2600, 'returnable': False},
    {'code': 'M1304', 'name': 'Rack of Lamb', 'category': 'Lamb',  'unit_price': 4500, 'returnable': False},
    # Turkey, Duck, Rabbit, Game
    {'code': 'M1401', 'name': 'Whole Turkey', 'category': 'Turkey & Other',  'unit_price': 6000, 'returnable': False},
    {'code': 'M1402', 'name': 'Turkey Breast', 'category': 'Turkey & Other',  'unit_price': 3200, 'returnable': False},
    {'code': 'M1403', 'name': 'Duck', 'category': 'Turkey & Other',  'unit_price': 2800, 'returnable': False},
    {'code': 'M1404', 'name': 'Rabbit', 'category': 'Turkey & Other',  'unit_price': 3500, 'returnable': False},
    {'code': 'M1405', 'name': 'Game Meats (Venison)', 'category': 'Game',  'unit_price': 7000, 'returnable': False},
    # Fish
    {'code': 'S1001', 'name': 'Salmon Fillets', 'category': 'Fish',  'unit_price': 6000, 'returnable': False},
    {'code': 'S1002', 'name': 'Tuna', 'category': 'Fish',  'unit_price': 2000, 'returnable': False},
    {'code': 'S1003', 'name': 'Tilapia', 'category': 'Fish',  'unit_price': 900, 'returnable': False},
    {'code': 'S1004', 'name': 'Cod', 'category': 'Fish',  'unit_price': 2500, 'returnable': False},
    {'code': 'S1005', 'name': 'Mackerel', 'category': 'Fish',  'unit_price': 700, 'returnable': False},
    {'code': 'S1006', 'name': 'Whole Fish (Seer Fish)', 'category': 'Fish',  'unit_price': 3000, 'returnable': False},
    # Shellfish
    {'code': 'S1101', 'name': 'Shrimp & Prawns', 'category': 'Shellfish',  'unit_price': 3500, 'returnable': False},
    {'code': 'S1102', 'name': 'Crab', 'category': 'Shellfish',  'unit_price': 6000, 'returnable': False},
    {'code': 'S1103', 'name': 'Lobster', 'category': 'Shellfish',  'unit_price': 15000, 'returnable': False},
    {'code': 'S1104', 'name': 'Clams', 'category': 'Shellfish',  'unit_price': 2500, 'returnable': False},
    {'code': 'S1105', 'name': 'Mussels', 'category': 'Shellfish',  'unit_price': 2200, 'returnable': False},
    {'code': 'S1106', 'name': 'Scallops', 'category': 'Shellfish',  'unit_price': 9000, 'returnable': False},
    # Specialty
    {'code': 'S1201', 'name': 'Octopus', 'category': 'Specialty',  'unit_price': 7000, 'returnable': False},
    {'code': 'S1202', 'name': 'Squid', 'category': 'Specialty',  'unit_price': 4000, 'returnable': False},
    {'code': 'S1203', 'name': 'Caviar', 'category': 'Specialty',  'unit_price': 25000, 'returnable': False},
    {'code': 'S1204', 'name': 'Smoked Fish', 'category': 'Specialty',  'unit_price': 4500, 'returnable': False},
    {'code': 'S1205', 'name': 'Sushi Grade Tuna', 'category': 'Specialty',  'unit_price': 10000, 'returnable': False},
    {'code': 'S1206', 'name': 'Sashimi Salmon', 'category': 'Specialty',  'unit_price': 12000, 'returnable': False},
    # Marinated & Ready to Cook
    {'code': 'R1001', 'name': 'Marinated Chicken', 'category': 'Marinated Meat & Ready to Cook',  'unit_price': 1800, 'returnable': False},
    {'code': 'R1002', 'name': 'Spiced Fish Fillets', 'category': 'Marinated Meat & Ready to Cook',  'unit_price': 2200, 'returnable': False},
    {'code': 'R1003', 'name': 'Meat Skewers', 'category': 'Marinated Meat & Ready to Cook',  'unit_price': 2000, 'returnable': False},
    # Frozen Meat & Seafood
    {'code': 'F1001', 'name': 'Frozen Chicken Cuts', 'category': 'Frozen Meat & Seafood',  'unit_price': 1300, 'returnable': False},
    {'code': 'F1002', 'name': 'Frozen Fish Fillets', 'category': 'Frozen Meat & Seafood',  'unit_price': 2500, 'returnable': False},
    {'code': 'F1003', 'name': 'Frozen Shellfish', 'category': 'Frozen Meat & Seafood',  'unit_price': 5500, 'returnable': False},
    # Value Packs & Combos
    {'code': 'V1001', 'name': 'Family Pack Chicken', 'category': 'Value Packs & Combos',  'unit_price': 5000, 'returnable': False},
    {'code': 'V1002', 'name': 'BBQ Pack Beef', 'category': 'Value Packs & Combos',  'unit_price': 7000, 'returnable': False},
    {'code': 'V1003', 'name': 'Weekly Meal Kit', 'category': 'Value Packs & Combos',  'unit_price': 3500, 'returnable': False}
]

In [11]:
def get_meat_type(category):
    category = category.lower()
    if 'chicken' in category:
        return 'chicken'
    elif 'beef' in category:
        return 'beef'
    elif 'pork' in category:
        return 'pork'
    elif 'lamb' in category or 'mutton' in category:
        return 'mutton'
    elif 'fish' in category:
        return 'fish'
    elif 'shellfish' in category:
        return 'shellfish'
    elif 'seafood' in category:
        return 'seafood'
    else:
        return 'other'

df_products = pd.DataFrame(products)
df_products['meat_type'] = df_products['category'].apply(get_meat_type)


In [12]:
religion_based_meat_preferences = {
    "buddhism":     {"chicken": 1.0, "beef": 0.2, "pork": 0.2, "mutton": 0.5, "fish":1.0, "shellfish":1.0, "seafood":1.0},
    "hinduism":     {"chicken": 1.0, "beef": 0.05, "pork": 0.05, "mutton": 0.5, "fish":1.0, "shellfish":1.0, "seafood":1.0},
    "islam":        {"chicken": 1.0, "beef": 1.0, "pork": 0.01, "mutton": 0.6, "fish":1.0, "shellfish":1.0, "seafood":1.0},
    "roman_catholic":{"chicken": 1.0, "beef": 0.6, "pork": 1.0, "mutton": 0.6, "fish":1.0, "shellfish":1.0, "seafood":1.0},
    "others":       {"chicken": 0.8, "beef": 0.8, "pork": 0.8, "mutton": 0.6, "fish":1.0, "shellfish":1.0, "seafood":1.0}
}

In [13]:
with open('./datasets/datasets/generated_sales_dataset.json', 'r') as file:
    sales_data = json.load(file)

df_sales = pd.DataFrame(sales_data)


In [14]:
def extract_discount(discount_str):
    try:
        return float(discount_str.strip('%')) / 100
    except:
        return 0.0

df_sales['discount_pct'] = df_sales['Discount'].apply(extract_discount)
df_sales['Buying Date'] = pd.to_datetime(df_sales['Buying Date'])
df_sales['month'] = df_sales['Buying Date'].dt.month
df_sales['day_of_week'] = df_sales['Buying Date'].dt.dayofweek
# Define special season months - example includes Jan, April, July, December
df_sales['special_season'] = df_sales['month'].apply(lambda x: 1 if x in [1,4,7,12] else 0)

In [15]:
df_sales = df_sales.merge(df_products[['code', 'meat_type']], left_on='Product Code', right_on='code', how='left')

In [16]:
train_records = []
for _, row in df_sales.iterrows():
    religion = 'others'  # Fill from actual user data if possible
    meat_type = row['meat_type']
    # Adjust unit price based on religion meat preference
    pref = religion_based_meat_preferences.get(religion, religion_based_meat_preferences['others'])
    adjusted_price = row['Unit Price'] * pref.get(meat_type, 1.0)

    train_records.append({
        'unit_price': adjusted_price,
        'family_size': 1,  # Default or infer if you have data
        'special_season': row['special_season'],
        'province': row['Province'],
        'city': row['City'],
        'religion': religion,
        'age_group': row['Customer Age Group'],
        'meat_type': meat_type,
        'discount_pct': row['discount_pct'],
        'month': row['month'],
        'day_of_week': row['day_of_week'],
        'target': row['Quantity']
    })

df_train = pd.DataFrame(train_records)


In [17]:
cat_features = ['province', 'city', 'religion', 'age_group', 'meat_type', 'month', 'day_of_week']
num_features = ['unit_price', 'family_size', 'special_season', 'discount_pct']

encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
X_cat = encoder.fit_transform(df_train[cat_features])
X_num = df_train[num_features].values
X = np.hstack([X_num, X_cat])
y = df_train['target'].values

# Split data to evaluate model performance
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=300, random_state=42)
model.fit(X_train, y_train)

# Evaluate
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error on Test Set: {mse:.4f}')

# Save model and encoder
with open('product_recommendation_model.pkl', 'wb') as f:
    pickle.dump(model, f)
with open('onehot_encoder.pkl', 'wb') as f:
    pickle.dump(encoder, f)


Mean Squared Error on Test Set: 0.8566


In [18]:
def recommend_products(user_input, df_products, model, encoder, top_n=5):
    rows = []
    religion = user_input.get('religion', 'others')
    fam_size = user_input.get('family_size', 1)
    special_season = int(user_input.get('special_season', 0))
    province = user_input.get('province', 'Unknown')
    city = user_input.get('city', 'Unknown')
    age_group = user_input.get('age_group', '18-30')
    month = user_input.get('month', 1)
    day_of_week = user_input.get('day_of_week', 0)

    for _, product in df_products.iterrows():
        meat_type = product['meat_type']
        pref = religion_based_meat_preferences.get(religion, religion_based_meat_preferences['others'])
        adjusted_price = product['unit_price'] * pref.get(meat_type, 1.0)

        rows.append({
            'code': product['code'],
            'name': product['name'],
            'category': product['category'],
            'unit_price': adjusted_price,
            'family_size': fam_size,
            'special_season': special_season,
            'province': province,
            'city': city,
            'religion': religion,
            'age_group': age_group,
            'meat_type': meat_type,
            'discount_pct': 0,  # unknown at prediction time
            'month': month,
            'day_of_week': day_of_week
        })

    df_user = pd.DataFrame(rows)
    X_user_cat = encoder.transform(df_user[cat_features])
    X_user_num = df_user[num_features].values
    X_user = np.hstack([X_user_num, X_user_cat])

    df_user['score'] = model.predict(X_user)
    top_products = df_user.sort_values(by='score', ascending=False).head(top_n)

    return top_products[['code', 'name', 'category', 'unit_price', 'score']].to_dict(orient='records')

In [None]:
if __name__ == '__main__':
    # Example user input
    user_input = {
        'religion': 'islam',
        'family_size': 5,
        'special_season': True,
        'province': 'Western',
        'city': 'Colombo',
        'age_group': '31-45',
        'month': 9,       
        'day_of_week': 4  
    }

    recommendations = recommend_products(user_input, df_products, model, encoder, top_n=5)
    import json
    print(json.dumps(recommendations, indent=2))

[
  {
    "code": "S1001",
    "name": "Salmon Fillets",
    "category": "Fresh Seafood - Fish",
    "unit_price": 6000.0,
    "score": 2.1766666666666667
  },
  {
    "code": "S1102",
    "name": "Crab",
    "category": "Fresh Seafood - Shellfish",
    "unit_price": 6000.0,
    "score": 2.1766666666666667
  },
  {
    "code": "S1006",
    "name": "Whole Fish (Seer Fish)",
    "category": "Fresh Seafood - Fish",
    "unit_price": 3000.0,
    "score": 2.1466666666666665
  },
  {
    "code": "M1401",
    "name": "Whole Turkey",
    "category": "Fresh Meat - Turkey & Other",
    "unit_price": 6000.0,
    "score": 2.1466666666666665
  },
  {
    "code": "S1101",
    "name": "Shrimp & Prawns",
    "category": "Fresh Seafood - Shellfish",
    "unit_price": 3500.0,
    "score": 2.14
  }
]
