In [None]:
!python -V
!pip -q install -U pip setuptools wheel

# downgrade to versions PyCaret 2.3.5 plays well with
!pip -q install "numpy==1.23.5" "pandas==1.5.3" "scipy==1.9.3" \
               "scikit-learn==1.0.2" "joblib<1.3" "numba<0.57" "matplotlib==3.7.2"

# install pycaret 2.3.5 + mlxtend WITHOUT pulling newest deps
!pip -q install "pycaret==2.3.5" "mlxtend==0.22.0" --no-deps

import os, time; print("Restarting…"); time.sleep(1); os._exit(0)


Python 3.11.13
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mPreparing metadata [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.
Restarting…


In [1]:
from pycaret.arules import *
import mlxtend
print("PyCaret AR ok; mlxtend:", mlxtend.__version__)


PyCaret AR ok; mlxtend: 0.22.0


In [3]:
import pandas as pd, requests

url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/groceries.csv"

# read raw text; each line is a transaction with comma-separated items
lines = requests.get(url, timeout=60).text.strip().splitlines()
basket = pd.DataFrame({"items": lines})

# Make (transaction_id, item) rows for PyCaret AR
tx = (
    basket.assign(TID=lambda d: d.index.astype(int))
          .assign(item_list=lambda d: d["items"].str.split(","))
          .explode("item_list")
          .rename(columns={"item_list": "item"})[["TID","item"]]
)
tx["item"] = tx["item"].str.strip()
print("Rows:", len(tx), "| Txns:", tx["TID"].nunique(), "| Unique items:", tx["item"].nunique())
tx.head()


Rows: 43367 | Txns: 9835 | Unique items: 169


Unnamed: 0,TID,item
0,0,citrus fruit
0,0,semi-finished bread
0,0,margarine
0,0,ready soups
1,1,tropical fruit


In [6]:
# Shim pandas Styler to re-add .hide_index() using the newer .hide(axis="index")
import pandas as pd
from pandas.io.formats.style import Styler

if not hasattr(Styler, "hide_index"):
    def _hide_index(self):
        # pandas >= 1.4 uses Styler.hide with axis kwarg; this emulates hide_index()
        return self.hide(axis="index")
    Styler.hide_index = _hide_index

print("Patched Styler.hide_index:", hasattr(Styler, "hide_index"))


Patched Styler.hide_index: True


In [8]:
from pycaret.arules import *

s = setup(
    data=tx,                 # (TID, item) rows from earlier cell
    transaction_id="TID",
    item_id="item",
    session_id=42
)
s


Description,Value
session_id,42.0
# Transactions,9835.0
# Items,169.0
Ignore Items,


(       TID                 item
 0        0         citrus fruit
 0        0  semi-finished bread
 0        0            margarine
 0        0          ready soups
 1        1       tropical fruit
 ...    ...                  ...
 9834  9834              chicken
 9834  9834       tropical fruit
 9834  9834     other vegetables
 9834  9834              vinegar
 9834  9834        shopping bags
 
 [43367 rows x 2 columns],
 'TID',
 'item',
 None,
 42,
 [])

In [9]:
# Mine rules prioritizing lift; keep rule length small for readability
model = create_model(metric="lift", threshold=1.0, max_len=3)

# Sort and show top rules
rules = model.sort_values(["lift","confidence","support"], ascending=False)
rules_display = rules[["antecedents","consequents","support","confidence","lift"]].head(20)
rules_display




Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(yogurt),(whole milk),0.056,0.4016,1.5717
1,(whole milk),(yogurt),0.056,0.2193,1.5717
2,(other vegetables),(whole milk),0.0748,0.3868,1.5136
3,(whole milk),(other vegetables),0.0748,0.2929,1.5136
5,(rolls/buns),(whole milk),0.0566,0.3079,1.205
4,(whole milk),(rolls/buns),0.0566,0.2216,1.205


In [13]:
import pickle, os

os.makedirs("/kaggle/working/media/figures", exist_ok=True)
os.makedirs("/kaggle/working/notebooks", exist_ok=True)

# Save the mined rules
rules.to_csv("/kaggle/working/media/figures/assoc_rules_pycaret.csv", index=False)

# Serialize the model manually
with open("/kaggle/working/notebooks/assoc_rules_pycaret_235.pkl", "wb") as f:
    pickle.dump(model, f)

print("Saved:")
print("- /kaggle/working/media/figures/assoc_rules_pycaret.csv")
print("- /kaggle/working/notebooks/assoc_rules_pycaret_235.pkl")


Saved:
- /kaggle/working/media/figures/assoc_rules_pycaret.csv
- /kaggle/working/notebooks/assoc_rules_pycaret_235.pkl
