In [96]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Step 0 Install Required Libraries

In [97]:
# Install Orange3 library
!pip install orange3 orange3-associate



In [98]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import time
from orangecontrib.associate.fpgrowth import *  # Association rule mining in Orange

# # Load your datasets into pandas dataframes
# dataset1 = pd.read_csv('/content/drive/MyDrive/datasets/3-MarketBasketAnalysis.csv')
# dataset2 = pd.read_csv('/content/drive/MyDrive/datasets/5-OnlineRetailTransactionData.csv')
# dataset3 = pd.read_csv('/content/drive/MyDrive/datasets/6-E-commerceBusinessTransaction.csv')
# dataset4 = pd.read_csv('/content/drive/MyDrive/datasets/7-TransactionData.csv')
# dataset5 = pd.read_csv('/content/drive/MyDrive/datasets/8-GoogleMerchandiseStore.csv')

# # Example: display the first few rows of dataset1
# dataset1.head()


# Step 1 Define Brute-Force Algorithm Function

In [99]:
import itertools
import time

def brute_force_association_rule_mining(data, min_support=0.1, min_confidence=0.7):
    print("Processing dataset start...")
    start_time = time.time()

    frequent_itemsets = []
    rules = []
    n_transactions = len(data)  # Number of transactions

    # Function to calculate support for a given itemset
    def support(itemset, data):
        count = sum(1 for transaction in data if set(itemset).issubset(transaction))
        return count / n_transactions

    # Function to calculate confidence for a given rule
    def confidence(rule, itemset_support, data):
        antecedent, _ = rule
        antecedent_support = support(antecedent, data)
        return itemset_support / antecedent_support if antecedent_support != 0 else 0

    # Get all unique items across transactions
    unique_items = sorted(set(itertools.chain.from_iterable(data)))

    # Generate all possible itemsets
    for r in range(1, len(unique_items) + 1):
        for itemset in itertools.combinations(unique_items, r):
            itemset_support = support(itemset, data)
            if itemset_support >= min_support:
                frequent_itemsets.append((itemset, itemset_support))

                # Generate all possible rules for each frequent itemset
                for consequent_size in range(1, len(itemset)):
                    for consequent in itertools.combinations(itemset, consequent_size):
                        antecedent = tuple(set(itemset) - set(consequent))
                        conf = confidence((antecedent, consequent), itemset_support, data)
                        if conf >= min_confidence:
                            rules.append((antecedent, consequent, conf))

    # End timing
    end_time = time.time()

    # Store results for the dataset
    dataset_result = {
        "frequent_itemsets": frequent_itemsets,
        "rules": rules,
        "execution_time": end_time - start_time
    }
    print(f"Completed: in {end_time - start_time:.4f} seconds")

    return dataset_result


# Step 2: Processing Dataset

## 2.1 Load Dataset from CSV

### Dataset-1

In [48]:
dataset = pd.read_csv('/content/drive/MyDrive/datasets/3-MarketBasketAnalysis.csv',
                       delimiter=';',
                       on_bad_lines='skip')  # Skips problematic lines
# View the first few rows
print(dataset.head())

# Group items by BillNo (transaction)
transactions = dataset.groupby('BillNo')['Itemname'].apply(list).tolist()

# Check the grouped transactions
print(transactions[:5])

  dataset = pd.read_csv('/content/drive/MyDrive/datasets/3-MarketBasketAnalysis.csv',


   BillNo                             Itemname  Quantity              Date  \
0  536365   WHITE HANGING HEART T-LIGHT HOLDER         6  01.12.2010 08:26   
1  536365                  WHITE METAL LANTERN         6  01.12.2010 08:26   
2  536365       CREAM CUPID HEARTS COAT HANGER         8  01.12.2010 08:26   
3  536365  KNITTED UNION FLAG HOT WATER BOTTLE         6  01.12.2010 08:26   
4  536365       RED WOOLLY HOTTIE WHITE HEART.         6  01.12.2010 08:26   

  Price  CustomerID         Country  
0  2,55     17850.0  United Kingdom  
1  3,39     17850.0  United Kingdom  
2  2,75     17850.0  United Kingdom  
3  3,39     17850.0  United Kingdom  
4  3,39     17850.0  United Kingdom  
[['WHITE HANGING HEART T-LIGHT HOLDER', 'WHITE METAL LANTERN', 'CREAM CUPID HEARTS COAT HANGER', 'KNITTED UNION FLAG HOT WATER BOTTLE', 'RED WOOLLY HOTTIE WHITE HEART.', 'SET 7 BABUSHKA NESTING BOXES', 'GLASS STAR FROSTED T-LIGHT HOLDER'], ['HAND WARMER UNION JACK', 'HAND WARMER RED POLKA DOT'], ['ASSO

### Dataset-2

In [69]:
dataset = pd.read_csv('/content/drive/MyDrive/datasets/4-GroceriesDataset.csv')
# View the first few rows
print(dataset.head())

# Group items by Member_number (transaction)
transactions = dataset.groupby('Member_number')['itemDescription'].apply(list).tolist()

# Check the grouped transactions
print(transactions[:5])

   Member_number        Date   itemDescription
0           1808  21-07-2015    tropical fruit
1           2552  05-01-2015        whole milk
2           2300  19-09-2015         pip fruit
3           1187  12-12-2015  other vegetables
4           3037  01-02-2015        whole milk
[['soda', 'canned beer', 'sausage', 'sausage', 'whole milk', 'whole milk', 'pickled vegetables', 'misc. beverages', 'semi-finished bread', 'hygiene articles', 'yogurt', 'pastry', 'salty snack'], ['frankfurter', 'frankfurter', 'beef', 'sausage', 'whole milk', 'soda', 'curd', 'white bread', 'whole milk', 'soda', 'whipped/sour cream', 'rolls/buns'], ['tropical fruit', 'butter milk', 'butter', 'frozen vegetables', 'sugar', 'specialty chocolate', 'whole milk', 'other vegetables'], ['sausage', 'root vegetables', 'rolls/buns', 'detergent', 'frozen meals', 'rolls/buns', 'dental care', 'rolls/buns'], ['other vegetables', 'pip fruit', 'root vegetables', 'canned beer', 'rolls/buns', 'whole milk', 'other vegetables', 'hy

### Dataset-3

In [79]:
dataset = pd.read_csv('/content/drive/MyDrive/datasets/6-E-commerceBusinessTransaction.csv')
# View the first few rows
print(dataset.head())

transactions = dataset.groupby('TransactionNo')['ProductName'].apply(list).tolist()

# Check the grouped transactions
print(transactions[:5])

  TransactionNo       Date ProductNo                          ProductName  \
0        581482  12/9/2019     22485        Set Of 2 Wooden Market Crates   
1        581475  12/9/2019     22596  Christmas Star Wish List Chalkboard   
2        581475  12/9/2019     23235             Storage Tin Vintage Leaf   
3        581475  12/9/2019     23272    Tree T-Light Holder Willie Winkie   
4        581475  12/9/2019     23239    Set Of 4 Knick Knack Tins Poppies   

   Price  Quantity  CustomerNo         Country  
0  21.47        12     17490.0  United Kingdom  
1  10.65        36     13069.0  United Kingdom  
2  11.53        12     13069.0  United Kingdom  
3  10.65        12     13069.0  United Kingdom  
4  11.94         6     13069.0  United Kingdom  
[['Cream Hanging Heart T-Light Holder', 'White Moroccan Metal Lantern', 'Cream Cupid Hearts Coat Hanger', 'Knitted Union Flag Hot Water Bottle', 'Red Woolly Hottie White Heart', 'Set 7 Babushka Nesting Boxes', 'Glass Star Frosted T-Light Holde

### Dataset-4

In [100]:
dataset4 = pd.read_csv('/content/drive/MyDrive/datasets/7-TransactionData.csv')
# View the first few rows
print(dataset4.head())

transactions4 = dataset4.groupby('TransactionId')['ItemDescription'].apply(list).tolist()

# Check the grouped transactions
print(transactions4[:5])

   UserId  TransactionId               TransactionTime  ItemCode  \
0  278166        6355745  Sat Feb 02 12:50:00 IST 2019    465549   
1  337701        6283376  Wed Dec 26 09:06:00 IST 2018    482370   
2  267099        6385599  Fri Feb 15 09:45:00 IST 2019    490728   
3  380478        6044973  Fri Jun 22 07:14:00 IST 2018    459186   
4      -1        6143225  Mon Sep 10 11:58:00 IST 2018   1733592   

                     ItemDescription  NumberOfItemsPurchased  CostPerItem  \
0   FAMILY ALBUM WHITE PICTURE FRAME                       6        11.73   
1              LONDON BUS COFFEE MUG                       3         3.52   
2  SET 12 COLOUR PENCILS DOLLY GIRL                       72         0.90   
3        UNION JACK FLAG LUGGAGE TAG                       3         1.73   
4                WASHROOM METAL SIGN                       3         3.40   

          Country  
0  United Kingdom  
1  United Kingdom  
2          France  
3  United Kingdom  
4  United Kingdom  
[['SET 7

### Dataset-5

In [107]:
dataset4 = pd.read_csv('/content/drive/MyDrive/datasets/8-GoogleMerchandiseStore.csv')
# View the first few rows
print(dataset4.head())

   transaction_id  Google Zip Hoodie Black  \
0           48494                     True   
1           48489                    False   
2           48482                    False   
3           48478                    False   
4           48472                    False   

   Google Women's Short Sleeve Hero Tee White  Google Trike Tee Black  \
0                                       False                   False   
1                                        True                    True   
2                                        True                   False   
3                                       False                   False   
4                                       False                   False   

   Google Women's Short Sleeve Badge Tee Grey  Google Hub Mug White  \
0                                       False                 False   
1                                       False                 False   
2                                        True                 False   


## 2.2 Filter NONE or NaN




In [108]:
cleaned_transactions = []
for transaction in transactions:
    # Convert items to strings and filter out any None or NaN values
    cleaned_transaction = [str(item) for item in transaction if pd.notna(item) and item != '']
    if cleaned_transaction:  # Ensure the transaction is not empty
        cleaned_transactions.append(cleaned_transaction)

## 2.3 Convert into List

Output example:


```
transactions = [
    ['CREAM CUPID HEARTS COAT HANGER', 'KNITTED UNION FLAG HOT WATER BOTTLE'],
    ['HAND WARMER RED POLKA DOT', 'HAND WARMER UNION JACK'],
    ['ASSORTED COLOUR BIRD ORNAMENT', 'BOX OF 6 ASSORTED COLOUR TEASPOONS'],
    ['BLUE COAT RACK PARIS FASHION', 'RED COAT RACK PARIS FASHION'],
    ['BATH BUILDING BLOCK WORD']
]
```

DEFINE THE COUNT OF ITEMS HERE ! ! !

In [102]:
from mlxtend.preprocessing import TransactionEncoder

# Convert transactions to binary matrix
te = TransactionEncoder()
te_ary = te.fit(cleaned_transactions).transform(cleaned_transactions)
binary_matrix = pd.DataFrame(te_ary, columns=te.columns_)

# Check the resulting binary matrix
print(binary_matrix.head())

# Reduce the number of unique items (columns)
binary_matrix_reduced = binary_matrix.iloc[:, :15]  # Keep only the first 50 items
# Remove rows where all items are "FALSE"
# The default is True for pandas when using `astype(bool)`
# Convert the DataFrame to boolean, then check if all values are True
binary_matrix = binary_matrix[~(binary_matrix == "FALSE").all(axis=1)]

mining_dataset = []
for idx, row in binary_matrix_reduced.iterrows():
    transaction = list(binary_matrix_reduced.columns[row == True])  # Add items where the value is True
    mining_dataset.append(transaction)

print(mining_dataset[:15])

# Save the reduced dataset
# binary_matrix1.to_csv('/content/drive/MyDrive/datasets/3-MarketBasketAnalysis_reduced.csv', index=True)

   10 Colour Spaceboy Pen  12 Coloured Party Balloons  \
0                   False                       False   
1                   False                       False   
2                   False                       False   
3                   False                       False   
4                   False                       False   

   12 Daisy Pegs In Wood Box  12 Egg House Painted Wood  \
0                      False                      False   
1                      False                      False   
2                      False                      False   
3                      False                      False   
4                      False                      False   

   12 Hanging Eggs Hand Painted  12 Ivory Rose Peg Place Settings  \
0                         False                             False   
1                         False                             False   
2                         False                             False   
3                         

## 2.4 Mapping itemName(str) to Integer


```
item_mapping[itemName] = item_counter
```



In [109]:
# Step 2: Create a mapping of item names to integers
item_mapping = {}
item_counter = 0
for transaction in mining_dataset:
    for item in transaction:
        if item not in item_mapping:
            item_mapping[item] = item_counter
            item_counter += 1

# Step 3: Convert transactions to lists of integers
encoded_transactions = [[item_mapping[item] for item in transaction] for transaction in mining_dataset]
print(f"Item count: {item_counter}")

Item count: 15


# Step 3: Running AR Mining Programs

In [113]:
min_support = 0.1
min_confidence = 0.5

In [114]:
import time

start_time = time.time()
# Step 1: Mining frequent itemsets
itemsets1_list = list(frequent_itemsets(encoded_transactions, min_support=min_support))

# Convert itemsets1_list to a dictionary format required by association_rules
itemsets = {itemset: support for itemset, support in itemsets1_list}

# Step 2: Generating association rules from the frequent itemsets
rules = list(association_rules(itemsets, min_confidence=min_confidence))

elapsed_time = time.time() - start_time
print(f"Time taken: {elapsed_time} seconds")

# Print the frequent itemsets
print("Frequent Itemsets:")
# Step 5: Print the itemsets and their support
for itemset, support in itemsets.items():  # Iterate over items (key-value pairs)
    decoded_itemset = [list(item_mapping.keys())[list(item_mapping.values()).index(i)] for i in itemset]
    print(f"Itemset: {decoded_itemset}, Support: {support}")

# Print the generated rules
print(f"Number of rules generated: {len(rules)}")
# Reverse the item_mapping to get item numbers back to names
reverse_item_mapping = {v: k for k, v in item_mapping.items()}

print("\nAssociation Rules:")
for rule in rules:
    # Unpacking the tuple (adjust this based on the structure of your rules)
    antecedents, consequents, confidence, support = rule[:]

    # Translate the antecedents and consequents using reverse_item_mapping
    decoded_antecedents = [reverse_item_mapping[item] for item in antecedents]
    decoded_consequents = [reverse_item_mapping[item] for item in consequents]

    # Print the rule with item names
    print(f"Rule: {decoded_antecedents} -> {decoded_consequents}, Confidence: {confidence}, Support: {support}")

Time taken: 0.2870039939880371 seconds
Frequent Itemsets:
Number of rules generated: 0

Association Rules:


# Step 4: Running Brute-Force Mining

In [115]:
brute_force_result = brute_force_association_rule_mining(encoded_transactions, min_support=min_support, min_confidence=min_confidence)

# Iterating over the frequent itemsets
for itemset, support in brute_force_result["frequent_itemsets"]:  # Access the dictionary key with a string
    # Decode the itemset
    decoded_itemset = [list(item_mapping.keys())[list(item_mapping.values()).index(i)] for i in itemset]
    print(f"Itemset: {decoded_itemset}, Support: {support}")

# Iterating over the rules
for rule in brute_force_result["rules"]:
    antecedent, consequent, confidence = rule  # Unpacking the tuple
    decoded_antecedent = [list(item_mapping.keys())[list(item_mapping.values()).index(i)] for i in antecedent]
    decoded_consequent = [list(item_mapping.keys())[list(item_mapping.values()).index(i)] for i in consequent]
    print(f"Rule: {decoded_antecedent} -> {decoded_consequent}, Confidence: {confidence}")

Processing dataset start...
Completed: in 320.3835 seconds
