In [1]:
!pip install mlxtend




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd

# Load data directly from the local path
orders = pd.read_csv("order.csv")
trees = pd.read_csv("treez.csv.csv", low_memory=False)

In [4]:
# Load data
orders = pd.read_csv("order.csv")
trees = pd.read_csv("treez.csv.csv", low_memory=False)

print("Orders preview:")
print(orders.head())
print("\nTrees preview:")
print(trees.head())

Orders preview:
   ORDER_ID    COMMON_NAME
0         3      Grevillea
1         3   Deodar cedar
2         3  Texas red oak
3         3  Austrian pine
4         3   London Plane

Trees preview:
  most_recent_observation most_recent_observation_type  common_name  \
0               6/17/2020               inventory_date       Locust   
1              06/09/2020               inventory_date          Oak   
2              06/03/2020               inventory_date          Ash   
3              06/03/2020               inventory_date  Scotch pine   
4               5/29/2020               inventory_date  Honeylocust   

         scientific_name         city       state  longitude_coordinate  \
0                    NaN  Albuquerque  New Mexico           -106.568366   
1                Quercus  Albuquerque  New Mexico           -106.655037   
2               Fraxinus  Albuquerque  New Mexico           -106.581127   
3       Pinus sylvestris  Albuquerque  New Mexico           -106.575056   
4  G

In [5]:
import re

def normalize_name(s):
    if pd.isna(s): return ""
    return re.sub(r"[^\w\s]", "", str(s).lower().strip())

kaggle_common_col = [c for c in trees.columns if "common" in c.lower()][0]

trees["common_norm"] = trees[kaggle_common_col].apply(normalize_name)
orders["common_norm"] = orders["COMMON_NAME"].apply(normalize_name)

valid_names = set(trees["common_norm"])
orders = orders[orders["common_norm"].isin(valid_names)]

print("âœ… Filtered orders:", len(orders))
print(orders.head())

âœ… Filtered orders: 20
   ORDER_ID    COMMON_NAME    common_norm
1         3   Deodar cedar   deodar cedar
2         3  Texas red oak  texas red oak
3         3  Austrian pine  austrian pine
4         3   London Plane   london plane
5         4   Siberian elm   siberian elm


In [6]:
transactions = orders.groupby("ORDER_ID")["common_norm"].apply(list).tolist()
print("Sample transaction baskets:")
for t in transactions[:5]:
    print(t)

Sample transaction baskets:
['deodar cedar', 'texas red oak', 'austrian pine', 'london plane']
['siberian elm', 'crabapple', 'desert willow']
['crabapple']
['crabapple', 'austrian pine', 'siberian elm', 'cottonwood', 'deodar cedar', 'bur oak', 'texas red oak']
['bur oak', 'deodar cedar', 'desert willow', 'texas red oak', 'crabapple']


In [7]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules


te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_enc = pd.DataFrame(te_ary, columns=te.columns_)

# Apriori
frequent_itemsets = apriori(df_enc, min_support=0.2, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

strong_rules = rules[(rules["confidence"] >= 0.7) & (rules["lift"] > 1)]
print("âœ… Strong rules found:", len(strong_rules))
print(strong_rules[["antecedents","consequents","support","confidence","lift"]].head(10))


âœ… Strong rules found: 1291
        antecedents      consequents  support  confidence      lift
3      (cottonwood)  (austrian pine)      0.2         1.0  2.500000
5   (austrian pine)   (deodar cedar)      0.4         1.0  1.666667
8    (london plane)  (austrian pine)      0.2         1.0  2.500000
11  (austrian pine)  (texas red oak)      0.4         1.0  1.666667
14     (cottonwood)        (bur oak)      0.2         1.0  2.500000
15        (bur oak)      (crabapple)      0.4         1.0  1.250000
17        (bur oak)   (deodar cedar)      0.4         1.0  1.666667
23        (bur oak)  (texas red oak)      0.4         1.0  1.666667
25     (cottonwood)      (crabapple)      0.2         1.0  1.250000
26     (cottonwood)   (deodar cedar)      0.2         1.0  1.666667


In [8]:
def recommend(tree, rules_df=strong_rules, top_n=5):
    recs = rules_df[rules_df['antecedents'].apply(lambda x: tree in x)]
    if recs.empty:
        print(f"No recommendations for {tree}")
        return
    recs = recs.sort_values(by=["confidence","lift"], ascending=False)
    for _, r in recs.head(top_n).iterrows():
        print(f"ðŸ’¡ If buyer gets {list(r['antecedents'])} â†’ recommend {list(r['consequents'])} "
              f"(conf={r['confidence']:.2f}, lift={r['lift']:.2f})")

# for instance
recommend("crabapple")


ðŸ’¡ If buyer gets ['austrian pine', 'crabapple'] â†’ recommend ['cottonwood'] (conf=1.00, lift=5.00)
ðŸ’¡ If buyer gets ['bur oak', 'austrian pine', 'crabapple'] â†’ recommend ['cottonwood'] (conf=1.00, lift=5.00)
ðŸ’¡ If buyer gets ['austrian pine', 'crabapple'] â†’ recommend ['bur oak', 'cottonwood'] (conf=1.00, lift=5.00)
ðŸ’¡ If buyer gets ['cottonwood', 'crabapple'] â†’ recommend ['bur oak', 'austrian pine'] (conf=1.00, lift=5.00)
ðŸ’¡ If buyer gets ['austrian pine', 'crabapple'] â†’ recommend ['bur oak', 'siberian elm'] (conf=1.00, lift=5.00)
