<a href="https://colab.research.google.com/github/ASMT-College/lab-2-association-mining-SanjivJais/blob/main/Lab_2_Association_mining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Implementation of **Apriori Algorithm** to find frequent itemsets and then derive association rules from the itemsets.

In [6]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Step 1: Load the dataset
data = pd.read_csv('store_data.csv', header=None) # Load without header
print("Initial Data:\n", data.head())

# Step 2: Convert dataset into a format suitable for the Apriori algorithm
# Each row is a transaction, and each column represents an item
# We need to melt the DataFrame and then one-hot encode the items
items = data.stack().str.strip().unique()
df_items = pd.DataFrame(0, index=data.index, columns=items)

for index, row in data.iterrows():
    for item in row.dropna():
        df_items.loc[index, item.strip()] = 1

print("\nOne-Hot Encoded Data:\n", df_items.head())

# Step 3: Apply the Apriori algorithm to find frequent itemsets
# Use a minimum support threshold of 0.05 (at least 5% of transactions)
# Lowering the support to get more itemsets
frequent_itemsets = apriori(df_items, min_support=0.05, use_colnames=True)
print("\nFrequent Itemsets:\n", frequent_itemsets)

# Step 4: Generate association rules from the frequent itemsets
# Use a minimum confidence threshold of 0.2 (at least 20% confidence)
# Lowering the confidence to get more rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.2)
print("\nAssociation Rules:\n", rules)

# Step 5: Interpret the results
# Display the rules in a simple format
if not rules.empty:
    for _, row in rules.iterrows():
        print(f"\nRule: {set(row['antecedents'])} -> {set(row['consequents'])}")
        print(f"Support: {row['support']:.2f}")
        print(f"Confidence: {row['confidence']:.2f}")
        print(f"Lift: {row['lift']:.2f}")
else:
    print("\nNo association rules found with the given thresholds.")

Initial Data:
               0          1           2                 3             4   \
0         shrimp    almonds     avocado    vegetables mix  green grapes   
1        burgers  meatballs        eggs               NaN           NaN   
2        chutney        NaN         NaN               NaN           NaN   
3         turkey    avocado         NaN               NaN           NaN   
4  mineral water       milk  energy bar  whole wheat rice     green tea   

                 5     6               7             8             9   \
0  whole weat flour  yams  cottage cheese  energy drink  tomato juice   
1               NaN   NaN             NaN           NaN           NaN   
2               NaN   NaN             NaN           NaN           NaN   
3               NaN   NaN             NaN           NaN           NaN   
4               NaN   NaN             NaN           NaN           NaN   

               10         11     12     13             14      15  \
0  low fat yogurt  green t

