In [1]:
from mlxtend.frequent_patterns import apriori
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

In [2]:
# Correct file path (ensure the path is correct)
file_path = r"C:\\Users\\bhimr\\OneDrive\\Documents\\Association Rules\\Online retail.xlsx"

# Step 1: Read the dataset (assuming data is in a suitable format like a list of transactions)
df = pd.read_excel(file_path, header=None)

In [3]:
# Example: Assuming each row represents a transaction and each item is separated by a delimiter (e.g., comma)
transactions = df[0].str.split(',').tolist()

In [4]:
# Step 2: Create a list of all unique items across all transactions
encoder = TransactionEncoder()
df_transformed = encoder.fit_transform(transactions)

In [5]:
# Convert the result into a DataFrame for better readability
df_transformed = pd.DataFrame(df_transformed, columns=encoder.columns_)

In [6]:
# Step 3: Generate frequent itemsets using the apriori algorithm
frequent_itemsets = apriori(df_transformed, min_support=0.05, use_colnames=True)


In [7]:
# Manually calculate confidence and lift
rules = []
for _, row in frequent_itemsets.iterrows():
    # Generate all possible item pairs
    itemset = row['itemsets']
    
    for item in itemset:
        # Calculate support, confidence, and lift manually
        support = row['support']
        confidence = support  # Simplified assumption; you may adjust for specific rules
        lift = confidence / (support * support)  # Simplified calculation

        if lift > 1:  # Consider only interesting rules with lift > 1
            rules.append({
                'itemset': itemset,
                'support': support,
                'confidence': confidence,
                'lift': lift
            })

In [8]:
# Convert the manually generated rules to a DataFrame
rules_df = pd.DataFrame(rules)

# Display the manually generated rules
print(rules_df)


                       itemset   support  confidence       lift
0                    (burgers)  0.087188    0.087188  11.469419
1                       (cake)  0.081056    0.081056  12.337171
2                    (chicken)  0.059992    0.059992  16.668889
3                  (chocolate)  0.163845    0.163845   6.103336
4                    (cookies)  0.080389    0.080389  12.439469
5                (cooking oil)  0.051060    0.051060  19.584856
6                       (eggs)  0.179709    0.179709   5.564540
7                   (escalope)  0.079323    0.079323  12.606723
8               (french fries)  0.170911    0.170911   5.851014
9            (frozen smoothie)  0.063325    0.063325  15.791579
10         (frozen vegetables)  0.095321    0.095321  10.490909
11             (grated cheese)  0.052393    0.052393  19.086514
12                 (green tea)  0.132116    0.132116   7.569122
13               (ground beef)  0.098254    0.098254  10.177748
14            (low fat yogurt)  0.076523

In [1]:
#Lift: A measure of the strength of an association rule compared to random chance. 
#It indicates how much more likely two items are to be purchased together than independently.
#lift = confidence/support

In [2]:
#Support: Indicates how frequently an item or itemset appears in the dataset.
#Support(A) = Transaction containing A / Total transaction
#Confidence: Measures the likelihood that item B is purchased when item A is purchased.


In [None]:
#