In [39]:
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder
from collections import defaultdict

In [40]:
data = pd.read_excel('/content/Online retail.xlsx')

# Preprocessing the dataset
data.dropna(inplace=True)  # Handling missing values
transactions = data.values.tolist()



In [41]:
# Convert transactions to a list of lists of items
transactions = [str(item).split(',') for row in transactions for item in row]
transactions

[['burgers', 'meatballs', 'eggs'],
 ['chutney'],
 ['turkey', 'avocado'],
 ['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea'],
 ['low fat yogurt'],
 ['whole wheat pasta', 'french fries'],
 ['soup', 'light cream', 'shallot'],
 ['frozen vegetables', 'spaghetti', 'green tea'],
 ['french fries'],
 ['eggs', 'pet food'],
 ['cookies'],
 ['turkey', 'burgers', 'mineral water', 'eggs', 'cooking oil'],
 ['spaghetti', 'champagne', 'cookies'],
 ['mineral water', 'salmon'],
 ['mineral water'],
 ['shrimp',
  'chocolate',
  'chicken',
  'honey',
  'oil',
  'cooking oil',
  'low fat yogurt'],
 ['turkey', 'eggs'],
 ['turkey',
  'fresh tuna',
  'tomatoes',
  'spaghetti',
  'mineral water',
  'black tea',
  'salmon',
  'eggs',
  'chicken',
  'extra dark chocolate'],
 ['meatballs', 'milk', 'honey', 'french fries', 'protein bar'],
 ['red wine', 'shrimp', 'pasta', 'pepper', 'eggs', 'chocolate', 'shampoo'],
 ['rice', 'sparkling water'],
 ['spaghetti', 'mineral water', 'ham', 'body spray',

In [42]:
# One-hot encode the transactions
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7495,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7496,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7497,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7498,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [43]:
# Applying the Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df, min_support=50/len(df), use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.020267,(almonds)
1,0.008800,(antioxydant juice)
2,0.033200,(avocado)
3,0.008667,(bacon)
4,0.010800,(barbecue sauce)
...,...,...
465,0.011467,"(spaghetti, mineral water, pancakes)"
466,0.006800,"(salmon, spaghetti, mineral water)"
467,0.008533,"(spaghetti, mineral water, shrimp)"
468,0.007467,"(spaghetti, mineral water, soup)"


In [44]:
# Generation of association rules from the frequent itemsets
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)
# Filtering rules based on confidence and length
rules = rules[rules['confidence'] >= 0.5]
rules = rules[rules['antecedents'].apply(len) + rules['consequents'].apply(len) >= 2]
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
485,"(chicken, chocolate)",(mineral water),0.014667,0.238267,0.0076,0.518182,2.174798,1.0,0.004105,1.580956,0.548228,0.030978,0.367471,0.275039
591,"(olive oil, chocolate)",(mineral water),0.0164,0.238267,0.008267,0.504065,2.11555,1.0,0.004359,1.535954,0.536102,0.03355,0.348939,0.26938
665,"(ground beef, eggs)",(mineral water),0.02,0.238267,0.010133,0.506667,2.126469,1.0,0.005368,1.544054,0.540548,0.040838,0.352354,0.274598
720,"(frozen vegetables, ground beef)",(mineral water),0.016933,0.238267,0.0092,0.543307,2.280248,1.0,0.005165,1.667933,0.571122,0.037398,0.400456,0.29096
727,"(frozen vegetables, ground beef)",(spaghetti),0.016933,0.174133,0.008667,0.511811,2.93919,1.0,0.005718,1.691695,0.671135,0.047515,0.408877,0.280791
773,"(ground beef, milk)",(mineral water),0.022,0.238267,0.011067,0.50303,2.111207,1.0,0.005825,1.532756,0.538177,0.044409,0.34758,0.274738
791,"(ground beef, pancakes)",(mineral water),0.014533,0.238267,0.007467,0.513761,2.156246,1.0,0.004004,1.566584,0.544139,0.030435,0.361668,0.272549
803,"(olive oil, milk)",(mineral water),0.017067,0.238267,0.008533,0.5,2.098489,1.0,0.004467,1.523467,0.532556,0.034576,0.343602,0.267907
821,"(soup, milk)",(mineral water),0.0152,0.238267,0.008533,0.561404,2.356198,1.0,0.004912,1.736752,0.584471,0.034839,0.424213,0.298609
850,"(salmon, spaghetti)",(mineral water),0.013467,0.238267,0.0068,0.50495,2.119266,1.0,0.003591,1.538701,0.535348,0.027763,0.350101,0.266745


#Analysis of Generated Rules:
##(chicken, chocolate) -> mineral water

* Confidence: 51.82%

* Lift: 2.17

###Insight: Customers who buy chicken and chocolate together are more than twice as likely to also purchase mineral water compared to the average customer.


##(olive oil, chocolate) -> mineral water

- Confidence: 50.41%

- Lift: 2.12

###Insight: There is a strong association between purchasing olive oil and chocolate together with mineral water, indicating that these items may be part of a planned shopping list or recipe.



##(ground beef, eggs) -> mineral water

- Confidence: 50.67%

- Lift: 2.13

###Insight: Ground beef and eggs are commonly bought together with mineral water, suggesting that customers might be preparing meals that require these ingredients.

##(frozen vegetables, ground beef) -> mineral water

- Confidence: 54.33%

- Lift: 2.28

###Insight: Customers who buy frozen vegetables and ground beef together have a high likelihood of also buying mineral water, indicating a trend towards purchasing ingredients for healthy meals.

##(frozen vegetables, ground beef) -> spaghetti

- Confidence: 51.18%

- Lift: 2.94

###Insight: There is a strong association between purchasing frozen vegetables, ground beef, and spaghetti, suggesting that customers are buying these items together for a specific recipe, like a pasta dish.

##(ground beef, milk) -> mineral water

- Confidence: 50.30%

- Lift: 2.11

###Insight: Ground beef and milk are commonly bought together with mineral water, indicating these items may be part of a balanced grocery list.

##(ground beef, pancakes) -> mineral water

-Confidence: 51.38%

- Lift: 2.16

###Insight: Customers who buy ground beef and pancakes together are more likely to also purchase mineral water, suggesting these items might be part of a broader meal plan.

##(olive oil, milk) -> mineral water

- Confidence: 50.00%

- Lift: 2.10

###Insight: Olive oil and milk being bought together with mineral water indicates a consistent shopping pattern where these items are part of a regular grocery purchase.

##(soup, milk) -> mineral water

- Confidence: 56.14%

- Lift: 2.36

###Insight: The combination of soup and milk being frequently bought with mineral water suggests that these items might be part of a convenient meal preparation plan.

##(salmon, spaghetti) -> mineral water

- Confidence: 50.50%

- Lift: 2.12

###Insight: There is a notable association between purchasing salmon, spaghetti, and mineral water, indicating that these items might be used together in meal preparation.

#Insights into Customer Purchasing Behavior:
- Healthy Eating: Several rules indicate that customers are purchasing ingredients like frozen vegetables, ground beef, and mineral water together, suggesting a trend towards healthy eating and meal planning.

- Meal Planning: Combinations like frozen vegetables, ground beef, and spaghetti, as well as olive oil, chocolate,

##INTERVIEW QUESTIONS

###What is Lift and why is it important in Association Rules?

- Lift measures the strength of a relationship between items. It shows how much more likely two items appear together than by chance.
If lift > 1, the items are positively related; if lift = 1, they are independent; if lift < 1, they are negatively related.
It helps identify meaningful associations beyond just frequent co-occurrence.

###What is Support and Confidence? How do you calculate them?

- Support: The frequency of an item or itemset in the dataset.
Support
=
Transactions containing the itemset
Total transactions
Support=
Total transactions
Transactions containing the itemset
​

- Confidence: The likelihood that one item appears given that another item is present.
Confidence
=
Transactions containing both items
Transactions containing the first item
Confidence=
Transactions containing the first item
Transactions containing both items
​

######Support finds common patterns, while confidence measures the strength of a rule.

##What are some limitations or challenges of Association Rule Mining?

- Too Many Rules: Generates a large number of rules, making it hard to find useful ones.
- Computational Cost: Requires significant time and memory for large datasets.
- Choosing Thresholds: Setting the right support and confidence levels is challenging.
- Interpretation Issues: Not all statistically strong rules are practically useful.
- Handling Continuous Data: Works best with categorical data, requiring extra processing for numerical values.
