In [12]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from google.colab import drive

# Mount Google Drive and load data
drive.mount('/content/drive/')
data = pd.read_csv('/content/drive/My Drive/Datasets/groceries.csv', header=None)

# Each row contains items bought in a transaction
# Convert this into a list of lists
transactions = data.apply(lambda row: row.dropna().tolist(), axis=1).tolist()

# Convert to one-hot encoded dataframe
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_array, columns=te.columns_)

# Apply Apriori algorithm
frequent_itemsets = apriori(df, min_support=0.02, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Show top 10 rules
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10))


Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
          antecedents         consequents   support  confidence      lift
0                 (1)       (canned beer)  0.026434    0.120426  1.550408
1       (canned beer)                 (1)  0.026434    0.340314  1.550408
2        (rolls/buns)                 (3)  0.024298    0.132117  1.000389
3                 (3)        (rolls/buns)  0.024298    0.183988  1.000389
4  (other vegetables)                 (4)  0.020943    0.108250  1.059451
5                 (4)  (other vegetables)  0.020943    0.204975  1.059451
6        (whole milk)                 (4)  0.028670    0.112216  1.098270
7                 (4)        (whole milk)  0.028670    0.280597  1.098270
8                 (5)        (whole milk)  0.026230    0.301754  1.181081
9        (whole milk)                 (5)  0.026230    0.102666  1.181081
