# Sai Pavan Kumar M
# Data Science - Batch January 2024 (Hyderabad) - Assignment 10

# Association Rules

In [None]:
# Load necessary packages
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
# Load the dataset without headers
df = pd.read_excel('Online retail.xlsx', sheet_name='Sheet1', header=None)

## Data Preprocessing

In [None]:
# Display the first few rows to understand the structure
print(df.head())
print(df.shape)

In [None]:
# Convert the single column of comma-separated items into a list of transactions
transactions = df[0].apply(lambda x: x.split(','))

In [None]:
# Display the first few transactions to verify
print(transactions.head())

In [None]:
print(transactions.shape)

## Implementing the Apriori Algorithm

In [None]:
# Convert the transactions list into a one-hot encoded DataFrame
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_trans = pd.DataFrame(te_ary, columns=te.columns_)

In [None]:
# Apply the Apriori algorithm with a minimum support threshold
frequent_itemsets = apriori(df_trans, min_support=0.005, use_colnames=True)

In [None]:
# Check if we found any frequent itemsets
print(frequent_itemsets.head())

if frequent_itemsets.empty:
    print("No frequent itemsets found. Try lowering the min_support value.")
else:
    # Generate association rules with a minimum confidence threshold
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.2)

    # Display the resulting rules
    print(rules.head())

## Analysis and Interpretation
- Analyze the generated rules
- Identify interesting patterns and relationships
- Provide insights into customer purchasing behavior

In [None]:
if not rules.empty:
    # Sort the rules by lift in descending order
    rules.sort_values('lift', ascending=False, inplace=True)

    # Display the top 10 rules
    print(rules.head(10))

    # Analyze the rules
    for _, rule in rules.head(10).iterrows():
        antecedents = ', '.join(list(rule['antecedents']))
        consequents = ', '.join(list(rule['consequents']))
        print(f"Rule: If a customer buys {antecedents}, they are likely to also buy {consequents}")
        print(f" - Support: {rule['support']}")
        print(f" - Confidence: {rule['confidence']}")
        print(f" - Lift: {rule['lift']}")
        print()
else:
    print("No association rules found.")