<a href="https://colab.research.google.com/github/AishlyManglani/Advanced-data-mining_CMPE-256/blob/main/Market%20Basket%20Item%20-%20Apriori%20Algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install apyori

Collecting apyori
  Downloading apyori-1.1.2.tar.gz (8.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: apyori
  Building wheel for apyori (setup.py) ... [?25l[?25hdone
  Created wheel for apyori: filename=apyori-1.1.2-py3-none-any.whl size=5954 sha256=4ef22899bc1bdbc7ef9cd7e2726d3fa85be64787b9a6eab808bfe9c3a3c3389d
  Stored in directory: /root/.cache/pip/wheels/c4/1a/79/20f55c470a50bb3702a8cb7c94d8ada15573538c7f4baebe2d
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.2


In [2]:
import pandas as pd
from apyori import apriori

In [3]:
# Load training dataset
train_data = pd.read_csv('/content/TRAIN-ARULES.csv')

In [12]:
# Preprocess data: Group items by order_id (Transaction ID)
transactions = train_data.groupby('order_id')['product_name'].apply(list)
print("\nSample Transactions:\n", transactions.head())


Sample Transactions:
 order_id
1483     [Organic Pink Lemonade Bunny Fruit Snacks, Dar...
4595     [Creme De Menthe Thins, Milk Chocolate English...
7099     [Revive Zero Vitamin Water, VitaminWater Zero™...
8382     [Mountain Spring Water, Clementines, Bag, Pine...
14400    [Truffle, Sea Salt, Black Truffle, 1500 Pale Ale]
Name: product_name, dtype: object


In [13]:
# Convert transactions into a list of lists
transaction_list = transactions.tolist()

In [14]:
# Apply the Apriori algorithm
min_support = 0.0045  # Set the minimum support threshold
association_rules = apriori(
    transaction_list,
    min_support=min_support,
    min_confidence=0.2,
    min_lift=3,
    min_length=2
)

In [15]:
# Convert the rules to a list
association_results = list(association_rules)
print("\nNumber of Association Rules Generated:", len(association_results))


Number of Association Rules Generated: 987


In [16]:
# Display the rules
print("\nAssociation Rules:")
for rule in association_results:
    items = [x for x in rule.items]
    print(f"Rule: {items[0]} -> {items[1:]}")
    print(f"Support: {rule.support}")
    for stat in rule.ordered_statistics:
        print(f"Confidence: {stat.confidence}")
        print(f"Lift: {stat.lift}")
    print("-" * 40)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Confidence: 0.7272727272727273
Lift: 93.75206611570248
Confidence: 0.888888888888889
Lift: 57.292929292929294
Confidence: 0.42105263157894735
Lift: 42.64661654135338
Confidence: 0.7999999999999999
Lift: 59.705263157894734
Confidence: 1.0
Lift: 19.16216216216216
Confidence: 0.7272727272727273
Lift: 51.56363636363636
Confidence: 0.888888888888889
Lift: 28.646464646464647
----------------------------------------
Rule: Oatmeal Crème Pies -> ['Honey Buns', 'Creamed Chipped Beef', 'Cran-Apple Juice Drink']
Support: 0.006346967559943582
Confidence: 0.5294117647058824
Lift: 83.41176470588236
Confidence: 0.6
Lift: 85.08
Confidence: 0.8181818181818181
Lift: 89.24475524475524
Confidence: 0.5294117647058824
Lift: 75.07058823529411
Confidence: 0.6428571428571428
Lift: 91.15714285714284
Confidence: 0.8181818181818181
Lift: 82.87012987012986
Confidence: 0.5625
Lift: 79.7625
Confidence: 0.8999999999999999
Lift: 79.76249999999999
Confiden

In [17]:
# Test dataset for prediction
test_data = pd.read_csv('/content/testarules.csv')


In [19]:
import numpy as np

In [20]:
# Predict the next basket item based on association rules
def predict_next_items(test_row, rules):
    basket = set()
    for item in test_row:
        if isinstance(item, (list, np.ndarray)):  # Handle lists or arrays
            basket.update([sub_item for sub_item in item if pd.notna(sub_item)])  # Flatten and exclude NaN
        elif pd.notna(item):  # Handle individual non-NaN items
            basket.add(item)
    predictions = []
    for rule in rules:
        base_items = set(rule.ordered_statistics[0].items_base)  # Base items in the rule
        add_items = set(rule.ordered_statistics[0].items_add)    # Items to add
        if base_items.issubset(basket):  # Check if base items match the basket
            predictions.extend(add_items)
    return list(set(predictions))  # Return unique predictions

# Apply predictions for each row in the test dataset
test_data['Predicted_Items'] = test_data.apply(
    lambda row: predict_next_items(row.values, association_results), axis=1
)
print("\nTest Data with Predictions:\n", test_data)


Test Data with Predictions:
                   Item1                                     Item2  \
0  Dark Chocolate Minis  Organic Pink Lemonade Bunny Fruit Snacks   

                        Item3  Item4  Item5  \
0  Peach-Pear Sparkling Water    NaN    NaN   

                                     Predicted_Items  
0  [Maple Pumpkin Seeds with Sea Salt Chewy with ...  
