## 1. Load data

In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import numpy as np
import matplotlib.pyplot as plt

# Display dataframes in full width
pd.set_option('display.max_colwidth', None)

In [2]:
df = pd.read_csv("/kaggle/input/online-purchase-intentions-during-crises/preprocessed_dataset.csv")
df.head(10)

Unnamed: 0,age,education,used_before,profession,EaseofNavigation1,EaseofNavigation2,LearnCurve1,LearnCurve2,ClarityofInstructions1,ClarityofInstructions2,...,fashion_unknown,platform_grocery_clean,grocery_arpico,grocery_cinnamonhotel,grocery_glomark,grocery_keels,grocery_other,grocery_pickmefoods,grocery_ubereats,grocery_unknown
0,1,3,1,3,4,4,4,4,4,4,...,1,['ubereats'],0,0,0,0,0,0,1,0
1,1,3,1,3,4,3,4,4,3,3,...,0,"['ubereats', 'pickmefoods', 'keels']",0,0,0,1,0,1,1,0
2,1,3,1,3,3,4,4,3,4,4,...,0,"['ubereats', 'pickmefoods']",0,0,0,0,0,1,1,0
3,1,5,1,3,4,3,2,3,3,2,...,0,"['ubereats', 'pickmefoods']",0,0,0,0,0,1,1,0
4,1,3,1,3,3,4,4,4,4,3,...,0,"['ubereats', 'pickmefoods', 'keels']",0,0,0,1,0,1,1,0
5,1,5,1,3,4,4,4,4,3,3,...,0,"['ubereats', 'pickmefoods', 'keels', 'arpico']",1,0,0,1,0,1,1,0
6,1,3,1,3,4,4,4,4,4,4,...,0,"['ubereats', 'pickmefoods', 'arpico']",1,0,0,0,0,1,1,0
7,1,3,1,2,4,3,5,5,4,5,...,0,"['ubereats', 'pickmefoods', 'keels']",0,0,0,1,0,1,1,0
8,1,3,1,3,3,3,1,3,2,3,...,1,['ubereats'],0,0,0,0,0,0,1,0
9,0,3,1,0,4,4,5,5,3,4,...,1,"['keels', 'arpico']",1,0,0,1,0,0,0,0


## 2. Construct mapping

In [3]:
construct_items = {
    'PEOU': [
        'EaseofNavigation1','EaseofNavigation2',
        'LearnCurve1','LearnCurve2',
        'ClarityofInstructions1','ClarityofInstructions2',
        'ResponseTime1','ResponseTime2',
        'ErrorHandling1','ErrorHandling2'
    ],
    'PU': [
        'ProductAvailability1','ProductAvailability2',
        'Convenience1','Convenience2',
        'Cost-effectiveness1','Cost-effectiveness2',
        'InformationAccessibility1','InformationAccessibility2',
        'Personalization1','Personalization2'
    ],
    'SA': [  
        'PrivacyProtection1','PrivacyProtection2',
        'PaymentSecurity1','PaymentSecurity2',
        'TransparentPolicies1','TransparentPolicies2'
    ],
    'SocialInfluence': [
        'WordofMouthandRecommendations1','WordofMouthandRecommendations2',
        'SocialMediaPosts/Influence1','SocialMediaPosts/Influence2',
        'OnlineReviewsandRatings1','OnlineReviewsandRatings2',
        'SocialProof1','SocialProof2',
        'NormativePressure1','NormativePressure2',
        'InformationSharing1','InformationSharing2'
    ],
    'Attitude': ['Attitude1','Attitude2'],
    'PerceivedRisk': ['PerceivedRisk1','PerceivedRisk2'],
    'Intention': [
        'IntentiontoPurchaseOnline1','IntentiontoPurchaseOnline2',
        'IntentiontoPurchaseOnline3','IntentiontoPurchaseOnline4'
    ]
}


## 3. Aggregate items into construct means

In [4]:
for construct, items in construct_items.items():
    df[construct] = df[items].mean(axis=1)

construct_cols = list(construct_items.keys())
df[construct_cols].describe()

Unnamed: 0,PEOU,PU,SA,SocialInfluence,Attitude,PerceivedRisk,Intention
count,825.0,825.0,825.0,825.0,825.0,825.0,825.0
mean,3.258788,3.291758,3.191919,3.375758,3.853333,3.875152,3.81697
std,0.559536,0.570007,0.555874,0.580757,0.638332,0.672032,0.519332
min,1.0,1.0,1.0,1.0,1.0,1.0,1.5
25%,3.0,3.0,3.0,3.0,3.5,3.5,3.5
50%,3.1,3.2,3.0,3.25,4.0,4.0,3.75
75%,3.6,3.6,3.5,3.833333,4.0,4.0,4.0
max,5.0,5.0,5.0,5.0,5.0,5.0,5.0



## Discretise construct scores

We convert 1‑5 Likert means into categorical levels:
* **Low**  : <3  
* **Neutral** : 3 ≤ score < 4  
* **High** : ≥4

## 4. Discretise 

In [5]:
bins   = [0, 3, 4, 5.1]
labels = ['Low','Neutral','High']

for col in construct_cols:
    df[col + '_cat'] = pd.cut(df[col], bins=bins, labels=labels, right=False)

# Peek
df[[c+'_cat' for c in construct_cols]].head()

Unnamed: 0,PEOU_cat,PU_cat,SA_cat,SocialInfluence_cat,Attitude_cat,PerceivedRisk_cat,Intention_cat
0,Neutral,Neutral,Low,Neutral,Neutral,Neutral,Low
1,Neutral,Neutral,Neutral,Neutral,Neutral,Neutral,High
2,Neutral,Neutral,High,Neutral,High,High,High
3,Low,Neutral,High,High,Neutral,Neutral,High
4,Neutral,High,Neutral,Neutral,High,Neutral,Neutral


## 5. One‑hot encode basket

In [6]:
basket = pd.get_dummies(df[[c+'_cat' for c in construct_cols]])

print('Basket shape:', basket.shape)
basket.head()

Basket shape: (825, 21)


Unnamed: 0,PEOU_cat_Low,PEOU_cat_Neutral,PEOU_cat_High,PU_cat_Low,PU_cat_Neutral,PU_cat_High,SA_cat_Low,SA_cat_Neutral,SA_cat_High,SocialInfluence_cat_Low,...,SocialInfluence_cat_High,Attitude_cat_Low,Attitude_cat_Neutral,Attitude_cat_High,PerceivedRisk_cat_Low,PerceivedRisk_cat_Neutral,PerceivedRisk_cat_High,Intention_cat_Low,Intention_cat_Neutral,Intention_cat_High
0,False,True,False,False,True,False,True,False,False,False,...,False,False,True,False,False,True,False,True,False,False
1,False,True,False,False,True,False,False,True,False,False,...,False,False,True,False,False,True,False,False,False,True
2,False,True,False,False,True,False,False,False,True,False,...,False,False,False,True,False,False,True,False,False,True
3,True,False,False,False,True,False,False,False,True,False,...,True,False,True,False,False,True,False,False,False,True
4,False,True,False,False,False,True,False,True,False,False,...,False,False,False,True,False,True,False,False,True,False


# Run Apriori Algorithm

### 1. KEEP ONLY _Low and _High columns (strip out _Neutral)

In [7]:
basket_hl = basket.filter(regex=r'_(Low|High)$').copy()

### 2. APRIORI on the High/Low basket

In [8]:
from mlxtend.frequent_patterns import apriori, association_rules

freq_itemsets = apriori(
    basket_hl,
    min_support = 0.05,   
    use_colnames = True
)

rules = association_rules(
    freq_itemsets,
    metric        = 'confidence',
    min_threshold = 0.65      
).query('lift >= 2.0')        
rules = rules.sort_values(
    ['lift','confidence','support'],
    ascending = False
).reset_index(drop=True)


### 3. TAKE THE TOP-5 RULES


In [9]:
top5 = rules.head(5).copy()

### 4. MAKE THEM HUMAN-READABLE & SAVE

In [10]:
top5['antecedents'] = top5['antecedents'].apply(lambda s: ', '.join(sorted(s)))
top5['consequents'] = top5['consequents'].apply(lambda s: ', '.join(sorted(s)))

print(top5[['antecedents','consequents','support','confidence','lift']])
top5.to_csv('top5_apriori_rules_HL.csv', index=False)

                                                     antecedents  \
0                               Attitude_cat_High, PEOU_cat_High   
1                                                     PU_cat_Low   
2                             PU_cat_Low, PerceivedRisk_cat_High   
3                          PU_cat_High, SocialInfluence_cat_High   
4  PU_cat_High, PerceivedRisk_cat_High, SocialInfluence_cat_High   

                                                     consequents   support  \
0                                                    PU_cat_High  0.052121   
1                                                   PEOU_cat_Low  0.111515   
2                                                   PEOU_cat_Low  0.061818   
3  Attitude_cat_High, Intention_cat_High, PerceivedRisk_cat_High  0.052121   
4                          Attitude_cat_High, Intention_cat_High  0.052121   

   confidence      lift  
0    0.651515  4.300000  
1    0.707692  3.560038  
2    0.689189  3.466958  
3    0.704918  2.3

## Interpretation & managerial insights 

| #     | Rule (IF → THEN)                                                                     | Support<sup>†</sup> | Confidence | Lift     | Insight                                                                                           | Key takeaway for Wolt (crisis‐period)                                                                                                         |
| ----- | ------------------------------------------------------------------------------------ | ------------------- | ---------- | -------- | ------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
| **1** | **High Attitude ∧ High PEOU** → **High PU**                                          | 5.2 %               | 0.65       | **4.30** | Shoppers who feel good *and* find the site easy are **4×** more likely to see it as useful.       | Pair positive brand messaging with friction-free UX (one-tap checkout, clear navigation) to amplify perceived usefulness.                     |
| **2** | **Low PU** → **Low PEOU**                                                            | 11.2 %              | 0.71       | 3.56     | When value isn’t obvious, users also judge the site hard to use.                                  | Lead with a crisp value proposition (time savings, price deals) to keep “ease-of-use” perceptions from collapsing.                            |
| **3** | **Low PU ∧ High Risk** → **Low PEOU**                                                | 6.2 %               | 0.69       | 3.47     | Utility doubts **combined** with fear of fraud almost guarantee usability complaints.             | Tackle both levers together: highlight benefits *and* surface trust badges/refund guarantees to lift ease-of-use sentiment.                   |
| **4** | **High PU ∧ High Social Influence** → **High Attitude ∧ High Intention ∧ High Risk** | 5.2 %               | 0.70       | 2.32     | Social proof and usefulness push attitude & intention high—even though perceived risk stays high. | Keep influencer/peer reviews front-and-centre, but add explicit risk-reduction cues (secure payment icons) so risk doesn’t stall conversions. |
| **5** | **High PU ∧ High Risk ∧ High Social Influence** → **High Attitude ∧ High Intention** | 5.2 %               | **0.78**   | 2.11     | Even risk-aware consumers intend to buy when usefulness *and* social buzz are strong.             | Bundle “utility” messaging with social proof; risk alone won’t deter purchases if both are convincing.                                        |
