
# **Apriori Data Mining Project**
This notebook demonstrates **Market Basket Analysis** using the **Apriori Algorithm**.
The objective is to find frequent item sets and generate association rules from transactional data.


In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from apyori import apriori
from mlxtend.frequent_patterns import apriori as mlxtend_apriori
from mlxtend.frequent_patterns import association_rules


## **1. Load and Preprocess Dataset**

In [None]:

# Load Dataset
df = pd.read_csv("Market_Basket_Optimisation.csv", header=None)
df.fillna(0, inplace=True)

# Convert Data into Transactions Format
transactions = []
for i in range(len(df)):
    transactions.append([str(df.values[i, j]) for j in range(20) if str(df.values[i, j]) != '0'])

print("Total Transactions:", len(transactions))


## **2. Apply Apriori Algorithm (apyori)**

In [None]:

# Apply Apriori
rules = apriori(transactions, min_support=0.003, min_confidence=0.2, min_lift=3, min_length=2)
results = list(rules)

# Convert Results to DataFrame
df_results = pd.DataFrame(results)
df_results.head()


## **3. Extract and Display Rules**

In [None]:

support = df_results.support
lhs_values, rhs_values, confidence_values, lift_values = [], [], [], []

for i in range(df_results.shape[0]):
    rule = df_results['ordered_statistics'][i][0]
    lhs_values.append(list(rule[0]))
    rhs_values.append(list(rule[1]))
    confidence_values.append(rule[2])
    lift_values.append(rule[3])

# Create DataFrame for Association Rules
lhs = pd.DataFrame(lhs_values)
rhs = pd.DataFrame(rhs_values)
confidence = pd.DataFrame(confidence_values, columns=['Confidence'])
lift = pd.DataFrame(lift_values, columns=['Lift'])

df_final = pd.concat([lhs, rhs, support, confidence, lift], axis=1)
df_final.fillna('', inplace=True)
df_final.columns = ['LHS', 'Item_1', 'RHS', 'Item_2', 'Item_3', 'Support', 'Confidence', 'Lift']
df_final['LHS'] = df_final['LHS'].astype(str) + ', ' + df_final['Item_1'].astype(str)
df_final['RHS'] = df_final['RHS'].astype(str) + ', ' + df_final['Item_2'].astype(str) + ', ' + df_final['Item_3'].astype(str)
df_final.drop(columns=['Item_1', 'Item_2', 'Item_3'], inplace=True)

# Display Top 10 Rules by Lift
df_final.sort_values('Lift', ascending=False).head(10)


## **4. Visualizing Top Products**

In [None]:

# Load Second Dataset
df1 = pd.read_csv("data-2.csv", encoding="ISO-8859-1")
df1['Description'] = df1['Description'].str.strip()
df1 = df1[df1.Quantity > 0]
df1.dropna(subset=['Description'], inplace=True)

# Plot Top Sold Items
top_items = df1.groupby("Description")["Quantity"].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 6))
plt.bar(top_items.index, top_items.values)
plt.xticks(rotation=45)
plt.title("Top 10 Most Sold Items")
plt.xlabel("Items")
plt.ylabel("Quantity")
plt.show()


## **5. Apply Apriori with mlxtend**

In [None]:

# Convert Data into Binary Format
basket = df1.pivot_table(index='InvoiceNo', columns='Description', values='Quantity', aggfunc='sum', fill_value=0)
basket_sets = basket.applymap(lambda x: 1 if x > 0 else 0)

# Apply Apriori
frequent_itemsets = mlxtend_apriori(basket_sets, min_support=0.03, use_colnames=True)
rules_mlxtend = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Filter Strong Rules
strong_rules = rules_mlxtend[(rules_mlxtend['lift'] >= 4) & (rules_mlxtend['confidence'] >= 0.7)]
strong_rules.head()


## **6. Save Results**

In [None]:

# Save results to CSV
df_final.to_csv("apriori_rules.csv", index=False)
strong_rules.to_csv("strong_association_rules.csv", index=False)
print("Apriori analysis completed and results saved.")
