# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

Items and add_ons

In [5]:
import pandas as pd

from itertools import combinations


url2 = "https://raw.githubusercontent.com/Radwa-Ayman239/MenueEngineering/refs/heads/main/Data%20Analysis/Datasets/dim_add_ons_cleaned.csv"
dim_add_ons_cleaned = pd.read_csv(url2)
url3 = "https://raw.githubusercontent.com/Radwa-Ayman239/MenueEngineering/refs/heads/main/Data%20Analysis/Datasets/dim_items_cleaned.csv"


dim_items_cleaned = pd.read_csv(url3, low_memory=False)



# We focus on the titles and their category groupings
addons = dim_add_ons_cleaned[['id', 'title', 'category_id']].copy()

#  Link Menu Items → Add-on Categories
# "We focus on items that have available add-ons"
item_addon_cats = dim_items_cleaned[['id', 'add_on_category_ids']].dropna().copy()

# Split the pipe-separated IDs into a list
item_addon_cats['category_id'] = (
    item_addon_cats['add_on_category_ids']
    .astype(str)
    .str.split('|')
)

# Explode the list so each row is a unique Item-Category pair
item_addon_cats = item_addon_cats.explode('category_id')
item_addon_cats['category_id'] = pd.to_numeric(item_addon_cats['category_id'], errors='coerce')
item_addon_cats = item_addon_cats.dropna()

#  Attach Add-on Titles
# This merges the item links with the actual add-on names
item_addons = pd.merge(
    item_addon_cats,
    addons,
    on='category_id',
    how='inner',
    suffixes=('_item_link', '')
)


#  Generate Transactions

# Each "Transaction" is the set of add-ons available to one specific menu item
transactions = (
    item_addons
    .groupby('id_item_link')['title']
    .apply(lambda x: sorted(set(x)))
    .tolist()
)

total_transactions = len(transactions)


#  Calculate Frequencies

item_counts = {}
pair_counts = {}

for transaction in transactions:
    for item in transaction:
        item_counts[item] = item_counts.get(item, 0) + 1
    for pair in combinations(transaction, 2):
        pair = tuple(sorted(pair))
        pair_counts[pair] = pair_counts.get(pair, 0) + 1

# Build Association Rules

# Filter by Support > 15% and Confidence > 70%
rules = []

for (a, b), ab_count in pair_counts.items():
    support_ab = ab_count / total_transactions
    support_a = item_counts[a] / total_transactions
    support_b = item_counts[b] / total_transactions

    # Direction: A → B
    conf_ab = support_ab / support_a
    if support_ab >= 0.15 and conf_ab >= 0.70:
        rules.append({
            'antecedent': a,
            'consequent': b,
            'support': support_ab,
            'confidence': conf_ab,
            'lift': support_ab / (support_a * support_b)
        })

    # Direction: B → A
    conf_ba = support_ab / support_b
    if support_ab >= 0.15 and conf_ba >= 0.70:
        rules.append({
            'antecedent': b,
            'consequent': a,
            'support': support_ab,
            'confidence': conf_ba,
            'lift': support_ab / (support_a * support_b)
        })

#  Final Output & Export

strong_rules = (
    pd.DataFrame(rules)
    .sort_values(['confidence', 'support'], ascending=False)
    .reset_index(drop=True)
)


print("Strong Add-on Associations Found:")
print(strong_rules.head(10))

# Save to CSV
strong_rules.to_csv('strong_rules.csv', index=False)
strong_rules.to_csv('strong_addon_rules.csv', index=False)

Strong Add-on Associations Found:
  antecedent  consequent   support  confidence      lift
0     Pølser       Bacon  0.194567    1.000000  3.296489
1     Pølser      Skinke  0.194567    1.000000  3.394523
2  Tacosauce  Champignon  0.190092    1.000000  2.895784
3  Tacosauce         Løg  0.190092    1.000000  2.883679
4       Majs      Ananas  0.273352    0.998389  2.859284
5     Pølser      Ananas  0.194252    0.998380  2.859260
6   Kødsauce  Champignon  0.302534    0.995025  2.881377
7  Oksefilet  Champignon  0.199168    0.994336  2.879382
8  Oksefilet       Kebab  0.199168    0.994336  2.971024
9  Spaghetti         Løg  0.286020    0.989318  2.852874


In [14]:
import pandas as pd
import numpy as np
from itertools import combinations

url2 = "https://raw.githubusercontent.com/Radwa-Ayman239/MenueEngineering/refs/heads/main/Data%20Analysis/Datasets/dim_add_ons_cleaned.csv"
dim_add_ons_cleaned = pd.read_csv(url2)
url3 = "https://raw.githubusercontent.com/Radwa-Ayman239/MenueEngineering/refs/heads/main/Data%20Analysis/Datasets/dim_items_cleaned.csv"


dim_items_cleaned = pd.read_csv(url3, low_memory=False)


# 2. Extract Titles and Categories
addons = dim_add_ons_cleaned[['id', 'title', 'category_id']].copy()
item_addon_cats = dim_items_cleaned[['id', 'add_on_category_ids']].dropna().copy()

# 3. Process IDs
item_addon_cats['category_id'] = item_addon_cats['add_on_category_ids'].astype(str).str.split('|')
item_addon_cats = item_addon_cats.explode('category_id')
item_addon_cats['category_id'] = pd.to_numeric(item_addon_cats['category_id'], errors='coerce')
item_addon_cats = item_addon_cats.dropna()

# 4. Merge to Link Items to Add-on Names
item_addons = pd.merge(
    item_addon_cats,
    addons,
    on='category_id',
    how='inner',
    suffixes=('_item_link', '')
)

# 5. Generate Transactions (Add-on groups per menu item)
transactions = (
    item_addons
    .groupby('id_item_link')['title']
    .apply(lambda x: sorted(set(x)))
    .tolist()
)

total_transactions = len(transactions)

# 6. Calculate Frequencies
item_counts = {}
pair_counts = {}

for transaction in transactions:
    for item in transaction:
        item_counts[item] = item_counts.get(item, 0) + 1
    for pair in combinations(transaction, 2):
        pair = tuple(sorted(pair))
        pair_counts[pair] = pair_counts.get(pair, 0) + 1

# 7. Build Association Rules
rules = []
for (a, b), ab_count in pair_counts.items():
    support_ab = ab_count / total_transactions
    support_a = item_counts[a] / total_transactions
    support_b = item_counts[b] / total_transactions

    # A -> B
    conf_ab = support_ab / support_a
    if support_ab >= 0.15 and conf_ab >= 0.70:
        rules.append({
            'antecedent': a,
            'consequent': b,
            'support': support_ab,
            'confidence': conf_ab,
            'lift': support_ab / (support_a * support_b)
        })

    # B -> A
    conf_ba = support_ab / support_b
    if support_ab >= 0.15 and conf_ba >= 0.70:
        rules.append({
            'antecedent': b,
            'consequent': a,
            'support': support_ab,
            'confidence': conf_ba,
            'lift': support_ab / (support_a * support_b)
        })

# 8. Final Export
strong_rules = pd.DataFrame(rules).sort_values(['confidence', 'support'], ascending=False).reset_index(drop=True)

# Save the results to CSV files
strong_rules.to_csv('strong_addon_rules.csv', index=False)

print(f"Success! Analyzed {total_transactions} items.")
print("The file 'strong_addon_rules.csv' has been saved to your current folder.")

Success! Analyzed 15866 items.
The file 'strong_addon_rules.csv' has been saved to your current folder.


In [9]:
import pandas as pd

from itertools import combinations


url3 = "https://raw.githubusercontent.com/Radwa-Ayman239/MenueEngineering/refs/heads/main/Data%20Analysis/Datasets/dim_items_cleaned.csv"


dim_items_cleaned = pd.read_csv(url3, low_memory=False)


df = dim_items_cleaned[['title', 'add_on_category_ids']].dropna()

# Split the pipe-separated IDs and explode them
df['cat_list'] = df['add_on_category_ids'].astype(str).str.split('|')
exploded = df.explode('cat_list')

# 3. Create "Category Baskets"
# Each Category ID acts as a "basket" containing all items that use it
transactions = (
    exploded.groupby('cat_list')['title']
    .apply(lambda x: sorted(set(x)))
    .tolist()
)

# Filter out categories that only apply to one item
transactions = [t for t in transactions if len(t) > 1]
total_trans = len(transactions)

# 4. Manual Association Logic
item_counts = {}
pair_counts = {}

for basket in transactions:
    for item in basket:
        item_counts[item] = item_counts.get(item, 0) + 1
    for pair in combinations(basket, 2):
        pair = tuple(sorted(pair))
        pair_counts[pair] = pair_counts.get(pair, 0) + 1

# 5. Build the Rules
rules = []
for (a, b), count in pair_counts.items():
    support = count / total_trans
    conf_a_to_b = count / item_counts[a]

    # We use a lower support (1%) because menu items are more diverse than ingredients
    if support >= 0.01 and conf_a_to_b >= 0.50:
        rules.append({
            'item_a': a,
            'item_b': b,
            'support': support,
            'confidence': conf_a_to_b,
            'lift': (count / total_trans) / ((item_counts[a]/total_trans) * (item_counts[b]/total_trans))
        })

# 6. Final Output with Safety Check
if len(rules) > 0:
    df_rules = pd.DataFrame(rules).sort_values(['confidence', 'support'], ascending=False)
    print("Strong Item-to-Item Structural Associations Found:")
    print(df_rules.head(10))
    df_rules.to_csv('item_structural_associations.csv', index=False)
else:
    print("No item associations found. Try lowering the support threshold.")

Strong Item-to-Item Structural Associations Found:
                       item_a                         item_b   support  \
16                   Coleslaw                Corn on the cob  0.039749   
13              Chicken Wings                       Coleslaw  0.039226   
14              Chicken Wings                Corn on the cob  0.039226   
36  Google Pixel 5 Reparation     Google Pixel 5A Reparation  0.018828   
37  Google Pixel 5 Reparation  Google Pixel 6 Pro Reparation  0.018828   
38  Google Pixel 5 Reparation      Google Pixel 6 Reparation  0.018828   
39  Google Pixel 5 Reparation     Google Pixel 6A Reparation  0.018828   
40  Google Pixel 5 Reparation  Google Pixel 7 Pro Reparation  0.018828   
41  Google Pixel 5 Reparation      Google Pixel 7 Reparation  0.018828   
42  Google Pixel 5 Reparation     Google Pixel 7A Reparation  0.018828   

    confidence       lift  
16         1.0  25.157895  
13         1.0  25.157895  
14         1.0  25.157895  
36         1.0  53.111

In [13]:
import pandas as pd
from itertools import combinations


url3 = "https://raw.githubusercontent.com/Radwa-Ayman239/MenueEngineering/refs/heads/main/Data%20Analysis/Datasets/dim_items_cleaned.csv"


dim_items_cleaned = pd.read_csv(url3, low_memory=False)

# 1. Group items by their shared Add-on Categories
df = dim_items_cleaned[['title', 'add_on_category_ids']].dropna()
df['cat_list'] = df['add_on_category_ids'].astype(str).str.split('|')
exploded = df.explode('cat_list')

transactions = (
    exploded.groupby('cat_list')['title']
    .apply(lambda x: sorted(set(x)))
    .tolist()
)

# # Filter groups and count occurrences
transactions = [t for t in transactions if len(t) > 1]
total_trans = len(transactions)
item_counts = {}
pair_counts = {}

for basket in transactions:
    for item in basket:
        item_counts[item] = item_counts.get(item, 0) + 1
    for pair in combinations(basket, 2):
        pair = tuple(sorted(pair))
        pair_counts[pair] = pair_counts.get(pair, 0) + 1

# Calculate Confidence and Save
rules = []
for (a, b), count in pair_counts.items():
    rules.append({
        'item_a': a,
        'item_b': b,
        'support': count/total_trans,
        'confidence': count/item_counts[a]
    })

df_item_rules = pd.DataFrame(rules).sort_values('confidence', ascending=False)

# THIS LINE CREATES THE FILE IN YOUR FOLDER
df_item_rules.to_csv('item_item_structural_associations.csv', index=False)
print("File 'item_item_structural_associations.csv' has been saved to your folder.")

File 'item_item_structural_associations.csv' has been saved to your folder.
