In [2]:
#D212 Task 3
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Read dataset
df = pd.read_csv('teleco_market_basket.csv')

print(df.head())

# Drop empty rows
df = df.dropna(how='all')

# Convert all columns to string type
df = df.astype(str)

# Create a list
transactions = df.values.tolist()

# Encoder
encoder = TransactionEncoder().fit(transactions)

# One-hot encode itemsets
onehot = encoder.transform(transactions)
onehot_df = pd.DataFrame(onehot, columns=encoder.columns_)

# Save the processed data
onehot_df.to_csv('processed_market_basket_data.csv', index=False)


                                  Item01                            Item02  \
0                                    NaN                               NaN   
1           Logitech M510 Wireless mouse                         HP 63 Ink   
2                                    NaN                               NaN   
3  Apple Lightning to Digital AV Adapter  TP-Link AC1750 Smart WiFi Router   
4                                    NaN                               NaN   

         Item03                      Item04                     Item05  \
0           NaN                         NaN                        NaN   
1     HP 65 ink  nonda USB C to USB Adapter  10ft iPHone Charger Cable   
2           NaN                         NaN                        NaN   
3  Apple Pencil                         NaN                        NaN   
4           NaN                         NaN                        NaN   

         Item06                        Item07  \
0           NaN                      

In [5]:
# Convert transactions
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Apriori algorithm
min_support = 0.05
frequent_itemsets = apriori(df_encoded, min_support=min_support, use_colnames=True)

# Association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

# Filtering out nan values
rules = rules[~rules['antecedents'].apply(lambda x: 'nan' in x or frozenset({'nan'}) in x)]
rules = rules[~rules['consequents'].apply(lambda x: 'nan' in x or frozenset({'nan'}) in x)]

# Sort rules based on lift in descending order to get top rules
sorted_rules = rules.sort_values(by='lift', ascending=False)

# Extract top 3 rules
top_rules = sorted_rules.head(3)
# Print top rules
print("Top Rules:")
print(top_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


Top Rules:
                           antecedents                         consequents  \
14    (Dust-Off Compressed Gas 2 pack)  (VIVO Dual LCD Monitor Desk mount)   
15  (VIVO Dual LCD Monitor Desk mount)    (Dust-Off Compressed Gas 2 pack)   
13                         (HP 61 ink)    (Dust-Off Compressed Gas 2 pack)   

     support  confidence      lift  
14  0.059725    0.250559  1.439085  
15  0.059725    0.343032  1.439085  
13  0.052660    0.321400  1.348332  


  and should_run_async(code)


# New Section