In [6]:
import warnings
from datetime import datetime

warnings.simplefilter("ignore", DeprecationWarning)
warnings.simplefilter("ignore", FutureWarning)

!pip -q install mlxtend

Romania

In [9]:
import pandas as pd
import numpy as np
import time

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, apriori, association_rules
from google.colab import drive
drive.mount('/content/drive')

PATH = "/content/drive/MyDrive/consumption_user.csv"
df = pd.read_csv(PATH, low_memory=False)

print("Shape:", df.shape)
df.head()

Mounted at /content/drive
Shape: (257952, 80)


Unnamed: 0,SUBJECT,ROUND,SURVEY_DAY,RESPONDER,SEASON,CONSUMPTION_DAY,CONSUMPTION_MONTH,CONSUMPTION_YEAR,WEEK_DAY,EXCEPTION_DAY,...,RETOL_mcg,VITD_mcg,VITE_mg,VITK_mcg,PHY_mg,N6_g,EPA_DHA_g,ALA_g,EAT_SEQ,VERSION
0,1,1,1,1,,,,2012,2,1,...,,1.6,0.35,5.12,,3.5,0.0,0.2,1,16-Jun-21
1,1,1,1,1,,,,2012,2,1,...,,0.0,0.57,11.55,,1.32,0.0,0.12,1,
2,1,1,1,1,,,,2012,2,1,...,,0.0,0.93,2.8,,0.87,0.0,0.13,1,
3,1,1,1,1,,,,2012,2,1,...,,0.0,0.0,0.0,,,0.0,,2,
4,1,1,1,1,,,,2012,2,1,...,,0.0,0.03,60.68,,,0.0,,1,


In [10]:
keep_cols = ["SUBJECT", "SURVEY_DAY", "INGREDIENT_ENG", "FOOD_AMOUNT_CONS"]
data = df[keep_cols].copy()

data["ITEM"] = data["INGREDIENT_ENG"].astype(str).str.strip()
data = data[data["ITEM"].notna() & (data["ITEM"] != "")]

data["FOOD_AMOUNT_CONS"] = pd.to_numeric(data["FOOD_AMOUNT_CONS"], errors="coerce")

print("Rânduri după curățare:", data.shape[0])
data.head()

Rânduri după curățare: 257952


Unnamed: 0,SUBJECT,SURVEY_DAY,INGREDIENT_ENG,FOOD_AMOUNT_CONS,ITEM
0,1,1,SALAMI,160.0,SALAMI
1,1,1,BREAD,150.0,BREAD
2,1,1,BUTTER,40.0,BUTTER
3,1,1,TEA,250.0,TEA
4,1,1,"VEGETABLES, MIXED",3.7,"VEGETABLES, MIXED"


In [11]:
data["TID"] = data["SUBJECT"].astype(str) + "_D" + data["SURVEY_DAY"].astype("Int64").astype(str)

print("Tranzacții (TID) unice:", data["TID"].nunique())
print("Item-uri unice:", data["ITEM"].nunique())
data[["TID", "ITEM"]].head()

Tranzacții (TID) unice: 9666
Item-uri unice: 1033


Unnamed: 0,TID,ITEM
0,1_D1,SALAMI
1,1_D1,BREAD
2,1_D1,BUTTER
3,1_D1,TEA
4,1_D1,"VEGETABLES, MIXED"


In [12]:
transactions = (
    data.drop_duplicates(["TID", "ITEM"])
        .groupby("TID")["ITEM"]
        .apply(list)
        .tolist()
)

print("Număr coșuri:", len(transactions))
print("Exemplu coș:", transactions[0][:15])

te = TransactionEncoder()
basket = te.fit(transactions).transform(transactions)
basket_df = pd.DataFrame(basket, columns=te.columns_)

print("Basket DF shape:", basket_df.shape)

Număr coșuri: 9666
Exemplu coș: ['BREAD, BROWN', 'TOMATO', 'HAM', 'COFFEE, UNSWEETENED', 'SALT', 'WATER (DRINKING  WATER)', 'PARSLEY ROOT', 'ZUCCHINI', 'BORSCHT, RAW', 'CARROT', 'PEPPER', 'GREEN BEANS, PODS', 'VEGETABLES, MIXED', 'ONION', 'MILK']
Basket DF shape: (9666, 1033)


In [13]:
MIN_ITEM_SUPPORT = 0.05
item_support = basket_df.mean()

common_items = item_support[item_support >= MIN_ITEM_SUPPORT].index
basket_df_small = basket_df[common_items]

print("După filtrare item-uri:", basket_df_small.shape)

După filtrare item-uri: (9666, 76)


In [31]:
MIN_SUPPORT = 0.05
start = time.time()

freq_fp = fpgrowth(
    basket_df_small,
    min_support=MIN_SUPPORT,
    use_colnames=True,
    max_len=2
)

t_fp = time.time() - start
print("FP-Growth time:", t_fp)
print("Nr itemsets:", len(freq_fp))

freq_fp["len"] = freq_fp["itemsets"].apply(len)
freq_fp.sort_values(["support", "len"], ascending=False).head(10)

FP-Growth time: 32.129029989242554
Nr itemsets: 922


Unnamed: 0,support,itemsets,len
0,0.942582,(SALT),1
1,0.869646,(OIL),1
76,0.858473,"(OIL, SALT)",2
2,0.83292,(ONION),1
77,0.826609,"(SALT, ONION)",2
3,0.778502,(CARROT),1
79,0.773743,"(SALT, CARROT)",2
4,0.772605,(BREAD),1
78,0.765467,"(OIL, ONION)",2
80,0.757604,"(ONION, CARROT)",2


In [32]:
start = time.time()

rules_fp = association_rules(freq_fp, metric="confidence", min_threshold=0.3)

t_rules = time.time() - start
print("Rules time:", t_rules)
print("Nr rules (brut):", len(rules_fp))

rules_fp = rules_fp[(rules_fp["lift"] > 1.0)].copy()
rules_fp.sort_values(["lift", "confidence"], ascending=False).head(10)

Rules time: 0.014458179473876953
Nr rules (brut): 1082


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
867,"(PASTA, NOODLES)",(CHICKEN BROTH),0.159321,0.095386,0.095282,0.598052,6.269816,1.0,0.080085,2.250575,0.999794,0.597664,0.555669,0.798484
866,(CHICKEN BROTH),"(PASTA, NOODLES)",0.095386,0.159321,0.095282,0.998915,6.269816,1.0,0.080085,775.105731,0.929132,0.597664,0.99871,0.798484
856,(CHICKEN BROTH),(POULTRY MEAT),0.095386,0.172771,0.095386,1.0,5.788024,1.0,0.078906,inf,0.914456,0.552096,1.0,0.776048
855,(POULTRY MEAT),(CHICKEN BROTH),0.172771,0.095386,0.095386,0.552096,5.788024,1.0,0.078906,2.01966,1.0,0.552096,0.504867,0.776048
692,(PASTA),"(CHEESE, TELEMEA)",0.081316,0.108421,0.050383,0.619593,5.71468,1.0,0.041566,2.343749,0.898037,0.361544,0.573333,0.542144
693,"(CHEESE, TELEMEA)",(PASTA),0.108421,0.081316,0.050383,0.464695,5.71468,1.0,0.041566,1.716187,0.925338,0.361544,0.417313,0.542144
352,(MEAT BROTH),(RICE),0.052555,0.245189,0.052038,0.990157,4.038338,1.0,0.039152,76.688765,0.794108,0.211789,0.98696,0.601197
688,(PASTA),(CONDIMENTS),0.081316,0.241361,0.070246,0.863868,3.579145,1.0,0.05062,5.572803,0.784387,0.278279,0.820557,0.577455
835,"(PASTA, NOODLES)",(POULTRY MEAT),0.159321,0.172771,0.097455,0.611688,3.540467,1.0,0.069929,2.130323,0.853538,0.415344,0.530588,0.58788
834,(POULTRY MEAT),"(PASTA, NOODLES)",0.172771,0.159321,0.097455,0.564072,3.540467,1.0,0.069929,1.92848,0.867415,0.415344,0.481457,0.58788


In [33]:
start = time.time()

freq_ap = apriori(
    basket_df_small,
    min_support=MIN_SUPPORT,
    use_colnames=True,
    max_len=2
)

t_ap = time.time() - start
print("Apriori time:", t_ap)
print("Nr itemsets:", len(freq_ap))

start = time.time()
rules_ap = association_rules(freq_ap, metric="confidence", min_threshold=0.3)
t_ap_rules = time.time() - start

rules_ap = rules_ap[rules_ap["lift"] > 1.0].copy()

print("Apriori rules time:", t_ap_rules)
print("Nr rules:", len(rules_ap))

rules_ap.sort_values(["lift", "confidence"], ascending=False).head(10)

Apriori time: 0.17417287826538086
Nr itemsets: 922
Apriori rules time: 0.01728963851928711
Nr rules: 839


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
441,"(PASTA, NOODLES)",(CHICKEN BROTH),0.159321,0.095386,0.095282,0.598052,6.269816,1.0,0.080085,2.250575,0.999794,0.597664,0.555669,0.798484
440,(CHICKEN BROTH),"(PASTA, NOODLES)",0.095386,0.159321,0.095282,0.998915,6.269816,1.0,0.080085,775.105731,0.929132,0.597664,0.99871,0.798484
444,(CHICKEN BROTH),(POULTRY MEAT),0.095386,0.172771,0.095386,1.0,5.788024,1.0,0.078906,inf,0.914456,0.552096,1.0,0.776048
443,(POULTRY MEAT),(CHICKEN BROTH),0.172771,0.095386,0.095386,0.552096,5.788024,1.0,0.078906,2.01966,1.0,0.552096,0.504867,0.776048
429,(PASTA),"(CHEESE, TELEMEA)",0.081316,0.108421,0.050383,0.619593,5.71468,1.0,0.041566,2.343749,0.898037,0.361544,0.573333,0.542144
430,"(CHEESE, TELEMEA)",(PASTA),0.108421,0.081316,0.050383,0.464695,5.71468,1.0,0.041566,1.716187,0.925338,0.361544,0.417313,0.542144
764,(MEAT BROTH),(RICE),0.052555,0.245189,0.052038,0.990157,4.038338,1.0,0.039152,76.688765,0.794108,0.211789,0.98696,0.601197
511,(PASTA),(CONDIMENTS),0.081316,0.241361,0.070246,0.863868,3.579145,1.0,0.05062,5.572803,0.784387,0.278279,0.820557,0.577455
922,"(PASTA, NOODLES)",(POULTRY MEAT),0.159321,0.172771,0.097455,0.611688,3.540467,1.0,0.069929,2.130323,0.853538,0.415344,0.530588,0.58788
921,(POULTRY MEAT),"(PASTA, NOODLES)",0.172771,0.159321,0.097455,0.564072,3.540467,1.0,0.069929,1.92848,0.867415,0.415344,0.481457,0.58788


In [34]:
basket_sizes_ro = [len(t) for t in transactions]

print("ROMÂNIA – statistici generale")
print("Nr tranzacții:", len(transactions))
print("Nr item-uri unice:", basket_df.shape[1])
print("Dimensiune coș (min / medie / max):",
      min(basket_sizes_ro),
      round(np.mean(basket_sizes_ro), 2),
      max(basket_sizes_ro))

ROMÂNIA – statistici generale
Nr tranzacții: 9666
Nr item-uri unice: 1033
Dimensiune coș (min / medie / max): 1 20.73 50


In [35]:
summary = pd.DataFrame([
    {"Algoritm":"FP-Growth", "time_itemsets":t_fp, "nr_itemsets":len(freq_fp), "time_rules":t_rules, "nr_rules":len(rules_fp)},
    {"Algoritm":"Apriori",   "time_itemsets":t_ap, "nr_itemsets":len(freq_ap), "time_rules":t_ap_rules, "nr_rules":len(rules_ap)},
])

summary

Unnamed: 0,Algoritm,time_itemsets,nr_itemsets,time_rules,nr_rules
0,FP-Growth,32.12903,922,0.014458,839
1,Apriori,0.174173,922,0.01729,839


Italia

In [36]:
PATH_IT = "/content/drive/MyDrive/consumption_italy.csv"

df_it = pd.read_csv(
    PATH_IT,
    low_memory=False,
    encoding="latin1"
)

print(df_it.shape)
df_it.head()

(285589, 77)


Unnamed: 0,SUBJECT,ROUND,SURVEY_DAY,RESPONDER,SEASON,CONSUMPTION_DAY,CONSUMPTION_MONTH,CONSUMPTION_YEAR,WEEK_DAY,EXCEPTION_DAY,...,VITA_RAE_mcg,VITA_RE_mcg,BCAROT_mcg,RETOL_mcg,VITD_mcg,VITE_mg,VITK_mcg,PHY_mg,EAT_SEQ,VERSION
0,1,1,1,,,28,6,2006,4,1,...,0,,0,0,0.0,0.21,,,1,16 June 2021
1,1,1,1,,,28,6,2006,4,1,...,52,,25,48,0.02,0.1,,,1,
2,1,1,1,,,28,6,2006,4,1,...,0,,0,0,0.0,0.0,,,1,
3,1,1,1,,,28,6,2006,4,1,...,0,,0,0,0.0,0.0,,,1,
4,1,1,1,,,28,6,2006,4,1,...,76,,454,0,0.0,1.02,,,1,


In [37]:
keep_cols = ["SUBJECT", "SURVEY_DAY", "INGREDIENT_ENG", "FOOD_AMOUNT_CONS"]
data_it = df_it[keep_cols].copy()

data_it["ITEM"] = data_it["INGREDIENT_ENG"].astype(str).str.strip()
data_it = data_it[data_it["ITEM"].notna() & (data_it["ITEM"] != "")]

data_it["FOOD_AMOUNT_CONS"] = pd.to_numeric(
    data_it["FOOD_AMOUNT_CONS"], errors="coerce"
)

print("Rânduri după curățare (IT):", data_it.shape[0])
data_it.head()

Rânduri după curățare (IT): 285589


Unnamed: 0,SUBJECT,SURVEY_DAY,INGREDIENT_ENG,FOOD_AMOUNT_CONS,ITEM
0,1,1,"Cereals ready-to-eat, crispy oat flakes, ""Extr...",,"Cereals ready-to-eat, crispy oat flakes, ""Extr..."
1,1,1,"Milk, cow, semi-skimmed",,"Milk, cow, semi-skimmed"
2,1,1,Sugar,,Sugar
3,1,1,"Pasta, unenriched, dry, raw",,"Pasta, unenriched, dry, raw"
4,1,1,"Tomato, peeled tomato, canned",,"Tomato, peeled tomato, canned"


In [38]:
data_it["TID"] = (
    data_it["SUBJECT"].astype(str)
    + "_D"
    + data_it["SURVEY_DAY"].astype("Int64").astype(str)
)

print("Tranzacții (TID) unice IT:", data_it["TID"].nunique())
print("Item-uri unice IT:", data_it["ITEM"].nunique())
data_it[["TID", "ITEM"]].head()

Tranzacții (TID) unice IT: 9969
Item-uri unice IT: 1112


Unnamed: 0,TID,ITEM
0,1_D1,"Cereals ready-to-eat, crispy oat flakes, ""Extr..."
1,1_D1,"Milk, cow, semi-skimmed"
2,1_D1,Sugar
3,1_D1,"Pasta, unenriched, dry, raw"
4,1_D1,"Tomato, peeled tomato, canned"


In [39]:
transactions_it = (
    data_it.drop_duplicates(["TID", "ITEM"])
           .groupby("TID")["ITEM"]
           .apply(list)
           .tolist()
)

print("Număr coșuri IT:", len(transactions_it))
print("Exemplu coș IT:", transactions_it[0][:15])

te = TransactionEncoder()
basket_it = te.fit(transactions_it).transform(transactions_it)
basket_df_it = pd.DataFrame(basket_it, columns=te.columns_)

print("Basket DF IT shape:", basket_df_it.shape)

Număr coșuri IT: 9969
Exemplu coș IT: ['Coffee, brewed, espresso, homemade, without sugar', 'Cake, commercially prepared, sponge cake type, "Buondì"', 'Water, bottled water - Leggera', 'Coffee, brewed, espresso, restaurant, bar-prepared, without sugar', 'Pasta, unenriched, dry, raw', 'Tomato, peeled tomato, canned', 'Garlic, raw', 'Parsley, raw', 'Caper, all types', 'Olive, black olive, processed', 'Fish, anchovy, raw', 'Vegetable oil, olive oil', 'Cheese, Gorgonzola, from cow milk', 'Apple, unpeeled, fresh', 'Pizza, white pizza']
Basket DF IT shape: (9969, 1112)


In [41]:
MIN_ITEM_SUPPORT = 0.05
item_support_it = basket_df_it.mean()

common_items_it = item_support_it[item_support_it >= MIN_ITEM_SUPPORT].index
basket_df_small_it = basket_df_it[common_items_it]

print("După filtrare item-uri IT:", basket_df_small_it.shape)

După filtrare item-uri IT: (9969, 84)


In [42]:
MIN_SUPPORT = 0.05
start = time.time()

freq_fp_it = fpgrowth(
    basket_df_small_it,
    min_support=MIN_SUPPORT,
    use_colnames=True,
    max_len=2
)

t_fp_it = time.time() - start
print("FP-Growth time IT:", t_fp_it)
print("Nr itemsets IT:", len(freq_fp_it))

freq_fp_it["len"] = freq_fp_it["itemsets"].apply(len)
freq_fp_it.sort_values(["support", "len"], ascending=False).head(10)

FP-Growth time IT: 33.7542827129364
Nr itemsets IT: 939


Unnamed: 0,support,itemsets,len
0,0.918347,"(Vegetable oil, olive oil)",1
15,0.726452,(Sugar),1
1,0.676497,"(Coffee, brewed, espresso, homemade, without s...",1
205,0.67409,"(Vegetable oil, olive oil, Sugar)",2
84,0.629451,"(Vegetable oil, olive oil, Coffee, brewed, esp...",2
16,0.612098,"(Onion, raw)",1
206,0.588023,"(Vegetable oil, olive oil, Onion, raw)",2
2,0.582506,"(Pasta, unenriched, dry, raw)",1
86,0.560036,"(Vegetable oil, olive oil, Pasta, unenriched, ...",2
85,0.540676,"(Sugar, Coffee, brewed, espresso, homemade, wi...",2


In [43]:
start = time.time()

rules_fp_it = association_rules(
    freq_fp_it,
    metric="confidence",
    min_threshold=0.3
)

t_rules_it = time.time() - start
print("Rules time IT:", t_rules_it)
print("Nr rules (brut) IT:", len(rules_fp_it))

rules_fp_it = rules_fp_it[rules_fp_it["lift"] > 1.0].copy()

rules_fp_it.sort_values(
    ["lift", "confidence"],
    ascending=False
).head(10)

Rules time IT: 0.014131307601928711
Nr rules (brut) IT: 980


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
612,"(Cucumber, raw)","(Red chicory, ""Radicchio"", raw)",0.089377,0.110041,0.064801,0.725028,6.588701,1.0,0.054966,3.236544,0.931478,0.481371,0.691028,0.656953
611,"(Red chicory, ""Radicchio"", raw)","(Cucumber, raw)",0.110041,0.089377,0.064801,0.588879,6.588701,1.0,0.054966,2.214974,0.953106,0.481371,0.548527,0.656953
598,"(Red chicory, ""Radicchio"", raw)","(Fennel, raw)",0.110041,0.134718,0.083659,0.760255,5.643324,1.0,0.068835,3.609182,0.924537,0.519303,0.722929,0.690627
597,"(Fennel, raw)","(Red chicory, ""Radicchio"", raw)",0.134718,0.110041,0.083659,0.620998,5.643324,1.0,0.068835,2.348163,0.950903,0.519303,0.574135,0.690627
439,"(Yeast, beer yeast tablets)","(Vegetable oil, mixed seeds)",0.137627,0.156385,0.120975,0.879009,5.620807,1.0,0.099452,6.972531,0.953287,0.69913,0.85658,0.826291
438,"(Vegetable oil, mixed seeds)","(Yeast, beer yeast tablets)",0.156385,0.137627,0.120975,0.773573,5.620807,1.0,0.099452,3.808612,0.974484,0.69913,0.737437,0.826291
614,"(Cucumber, raw)","(Fennel, raw)",0.089377,0.134718,0.064701,0.723906,5.373504,1.0,0.05266,3.134011,0.893786,0.405916,0.68092,0.602087
613,"(Fennel, raw)","(Cucumber, raw)",0.134718,0.089377,0.064701,0.480268,5.373504,1.0,0.05266,1.752101,0.94062,0.405916,0.429257,0.602087
437,"(Cheese, Mozzarella, cow's milk)","(Yeast, beer yeast tablets)",0.241749,0.137627,0.123082,0.509129,3.699346,1.0,0.08981,1.756821,0.962323,0.480235,0.43079,0.701722
436,"(Yeast, beer yeast tablets)","(Cheese, Mozzarella, cow's milk)",0.137627,0.241749,0.123082,0.894315,3.699346,1.0,0.08981,7.174619,0.846132,0.480235,0.86062,0.701722


In [44]:
start = time.time()

freq_ap_it = apriori(
    basket_df_small_it,
    min_support=MIN_SUPPORT,
    use_colnames=True,
    max_len=2
)

t_ap_it = time.time() - start
print("Apriori time IT:", t_ap_it)
print("Nr itemsets IT:", len(freq_ap_it))


start = time.time()
rules_ap_it = association_rules(
    freq_ap_it,
    metric="confidence",
    min_threshold=0.3
)
t_ap_rules_it = time.time() - start

rules_ap_it = rules_ap_it[rules_ap_it["lift"] > 1.0].copy()

print("Apriori rules time IT:", t_ap_rules_it)
print("Nr rules IT:", len(rules_ap_it))

rules_ap_it.sort_values(
    ["lift", "confidence"],
    ascending=False
).head(10)

Apriori time IT: 0.09719014167785645
Nr itemsets IT: 939
Apriori rules time IT: 0.008046865463256836
Nr rules IT: 698


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
517,"(Cucumber, raw)","(Red chicory, ""Radicchio"", raw)",0.089377,0.110041,0.064801,0.725028,6.588701,1.0,0.054966,3.236544,0.931478,0.481371,0.691028,0.656953
516,"(Red chicory, ""Radicchio"", raw)","(Cucumber, raw)",0.110041,0.089377,0.064801,0.588879,6.588701,1.0,0.054966,2.214974,0.953106,0.481371,0.548527,0.656953
572,"(Red chicory, ""Radicchio"", raw)","(Fennel, raw)",0.110041,0.134718,0.083659,0.760255,5.643324,1.0,0.068835,3.609182,0.924537,0.519303,0.722929,0.690627
571,"(Fennel, raw)","(Red chicory, ""Radicchio"", raw)",0.134718,0.110041,0.083659,0.620998,5.643324,1.0,0.068835,2.348163,0.950903,0.519303,0.574135,0.690627
962,"(Yeast, beer yeast tablets)","(Vegetable oil, mixed seeds)",0.137627,0.156385,0.120975,0.879009,5.620807,1.0,0.099452,6.972531,0.953287,0.69913,0.85658,0.826291
961,"(Vegetable oil, mixed seeds)","(Yeast, beer yeast tablets)",0.156385,0.137627,0.120975,0.773573,5.620807,1.0,0.099452,3.808612,0.974484,0.69913,0.737437,0.826291
514,"(Cucumber, raw)","(Fennel, raw)",0.089377,0.134718,0.064701,0.723906,5.373504,1.0,0.05266,3.134011,0.893786,0.405916,0.68092,0.602087
513,"(Fennel, raw)","(Cucumber, raw)",0.134718,0.089377,0.064701,0.480268,5.373504,1.0,0.05266,1.752101,0.94062,0.405916,0.429257,0.602087
370,"(Cheese, Mozzarella, cow's milk)","(Yeast, beer yeast tablets)",0.241749,0.137627,0.123082,0.509129,3.699346,1.0,0.08981,1.756821,0.962323,0.480235,0.43079,0.701722
369,"(Yeast, beer yeast tablets)","(Cheese, Mozzarella, cow's milk)",0.137627,0.241749,0.123082,0.894315,3.699346,1.0,0.08981,7.174619,0.846132,0.480235,0.86062,0.701722


In [45]:
summary_it = pd.DataFrame([
    {
        "Algoritm": "FP-Growth",
        "time_itemsets": t_fp_it,
        "nr_itemsets": len(freq_fp_it),
        "time_rules": t_rules_it,
        "nr_rules": len(rules_fp_it)
    },
    {
        "Algoritm": "Apriori",
        "time_itemsets": t_ap_it,
        "nr_itemsets": len(freq_ap_it),
        "time_rules": t_ap_rules_it,
        "nr_rules": len(rules_ap_it)
    },
])

summary_it

Unnamed: 0,Algoritm,time_itemsets,nr_itemsets,time_rules,nr_rules
0,FP-Growth,33.754283,939,0.014131,698
1,Apriori,0.09719,939,0.008047,698


In [46]:
basket_sizes_it = [len(t) for t in transactions_it]

print("ITALIA – statistici generale")
print("Nr tranzacții:", len(transactions_it))
print("Nr item-uri unice:", basket_df_it.shape[1])
print("Dimensiune coș (min / medie / max):",
      min(basket_sizes_it),
      round(np.mean(basket_sizes_it), 2),
      max(basket_sizes_it))

ITALIA – statistici generale
Nr tranzacții: 9969
Nr item-uri unice: 1112
Dimensiune coș (min / medie / max): 1 21.98 51


In [47]:
summary_countries = pd.DataFrame([
    {
        "Country": "Romania",
        "Transactions": len(transactions),
        "Items": basket_df.shape[1],
        "Rules": len(rules_fp)
    },
    {
        "Country": "Italy",
        "Transactions": len(transactions_it),
        "Items": basket_df_it.shape[1],
        "Rules": len(rules_fp_it)
    }
])

summary_countries

Unnamed: 0,Country,Transactions,Items,Rules
0,Romania,9666,1033,839
1,Italy,9969,1112,698


In [48]:
rules_ro_tbl = (
    rules_fp[
        ["antecedents", "consequents", "support", "confidence", "lift"]
    ]
    .copy()
)
rules_ro_tbl["Country"] = "Romania"

rules_it_tbl = (
    rules_fp_it[
        ["antecedents", "consequents", "support", "confidence", "lift"]
    ]
    .copy()
)
rules_it_tbl["Country"] = "Italy"

rules_compare = pd.concat(
    [rules_ro_tbl, rules_it_tbl],
    ignore_index=True
)

print("Top 5 reguli – România (FP-Growth)")
display(
    rules_compare
        .query("Country == 'Romania'")
        .sort_values(["lift", "confidence"], ascending=False)
        .head(5)
)

print("Top 5 reguli – Italia (FP-Growth)")
display(
    rules_compare
        .query("Country == 'Italy'")
        .sort_values(["lift", "confidence"], ascending=False)
        .head(5)
)

Top 5 reguli – România (FP-Growth)


Unnamed: 0,antecedents,consequents,support,confidence,lift,Country
694,"(PASTA, NOODLES)",(CHICKEN BROTH),0.095282,0.598052,6.269816,Romania
693,(CHICKEN BROTH),"(PASTA, NOODLES)",0.095282,0.998915,6.269816,Romania
683,(CHICKEN BROTH),(POULTRY MEAT),0.095386,1.0,5.788024,Romania
682,(POULTRY MEAT),(CHICKEN BROTH),0.095386,0.552096,5.788024,Romania
552,(PASTA),"(CHEESE, TELEMEA)",0.050383,0.619593,5.71468,Romania


Top 5 reguli – Italia (FP-Growth)


Unnamed: 0,antecedents,consequents,support,confidence,lift,Country
1277,"(Cucumber, raw)","(Red chicory, ""Radicchio"", raw)",0.064801,0.725028,6.588701,Italy
1276,"(Red chicory, ""Radicchio"", raw)","(Cucumber, raw)",0.064801,0.588879,6.588701,Italy
1267,"(Red chicory, ""Radicchio"", raw)","(Fennel, raw)",0.083659,0.760255,5.643324,Italy
1266,"(Fennel, raw)","(Red chicory, ""Radicchio"", raw)",0.083659,0.620998,5.643324,Italy
1160,"(Yeast, beer yeast tablets)","(Vegetable oil, mixed seeds)",0.120975,0.879009,5.620807,Italy
