## Predictive Analysis Objectives (Association Rule Mining)

Based on the given dataset, the *potential* objectives for predictive analysis using association rule mining are:

1. **Identifying Cross-Shopping Behavior**
2. **Identifying Amenity Preferences Based on Store Choices**
3. **Identifying the Optimal Store Sequences**

In [9]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

# Load the CSV file
file_path = '../data/dino_mall_cleaned.csv'
df = pd.DataFrame(pd.read_csv(file_path))
df.shape
# df.head()

(113, 34)

In [10]:
# get the categories of stores
unique_values = set(df.iloc[:, 7].unique())
unique_values.update(["Center of the mall", "End of the mall", "Near/Beside the entrance"])
unique_values

{'Apparel and Fashion',
 'Beauty and Personal Care, Health and Wellness',
 'Center of the mall',
 'Department Stores',
 'Electronics and Gadgets',
 'End of the mall',
 'Entertainment',
 'Food and Beverages',
 'Home Furnishings and Decor',
 'Near/Beside the entrance',
 'Services',
 'Specialty Stores'}

In [11]:
def get_transaction(x):
    transaction = []

    for amenity in x:
        if isinstance(amenity, str):
            items = [item.strip() for item in amenity.split(',')]
            filtered_items = [it for it in items if it in unique_values]
            transaction.append(filtered_items)
        else:
            transaction.append([])

    return transaction

def get_association_rules(transaction):
    # Use TransactionEncoder to convert to a one-hot encoded format
    te = TransactionEncoder()
    te_ary = te.fit(transaction).transform(transaction)
    amenities_df = pd.DataFrame(te_ary, columns=te.columns_)

    # Apply apriori algorithm to find frequent itemsets
    # You can adjust min_support as needed
    frequent_itemsets = apriori(amenities_df, min_support=0.1, use_colnames=True)

    # Generate association rules
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

    return rules

def print_rule(df, col_name):
    print("\nAssociation Rules for :", col_name)
    display(df[['antecedents', 'consequents', 'support', 'confidence', 'lift']].sort_values(by='lift', ascending=False).head(5))

In [13]:
for i in range(28, 34):
    col_data = df.iloc[:, i]
    col_name = df.columns[i]
    data = get_transaction(col_data)
    rules = get_association_rules(data)
    print_rule(rules, col_name)


Association Rules for : amenities_beside_customer_service_desks


Unnamed: 0,antecedents,consequents,support,confidence,lift
48,(Specialty Stores),"(Home Furnishings and Decor, Services)",0.115044,0.52,3.6725
47,"(Home Furnishings and Decor, Services)",(Specialty Stores),0.115044,0.8125,3.6725
19,"(Apparel and Fashion, Services)",(Specialty Stores),0.106195,0.8,3.616
15,"(Apparel and Fashion, Services)",(Home Furnishings and Decor),0.106195,0.8,3.348148
39,"(Specialty Stores, Electronics and Gadgets)",(Home Furnishings and Decor),0.106195,0.8,3.348148



Association Rules for : amenities_beside_comfort_rooms


Unnamed: 0,antecedents,consequents,support,confidence,lift
13,(Services),"(Food and Beverages, Department Stores)",0.106195,0.75,3.259615
4,(Services),(Department Stores),0.115044,0.8125,2.295313
11,"(Food and Beverages, Services)",(Department Stores),0.106195,0.8,2.26
9,"(Food and Beverages, Department Stores)",(Entertainment),0.150442,0.653846,1.944332
3,(Home Furnishings and Decor),(Department Stores),0.106195,0.666667,1.883333



Association Rules for : amenities_beside_common_area


Unnamed: 0,antecedents,consequents,support,confidence,lift
19,(Electronics and Gadgets),"(Food and Beverages, Apparel and Fashion)",0.106195,0.571429,3.798319
16,"(Food and Beverages, Apparel and Fashion)",(Electronics and Gadgets),0.106195,0.705882,3.798319
17,"(Food and Beverages, Electronics and Gadgets)",(Apparel and Fashion),0.106195,0.8,3.013333
27,(Specialty Stores),"(Food and Beverages, Entertainment)",0.106195,0.6,2.511111
1,(Electronics and Gadgets),(Apparel and Fashion),0.123894,0.666667,2.511111



Association Rules for : amenities_beside_directory


Unnamed: 0,antecedents,consequents,support,confidence,lift
3,"(Near/Beside the entrance, Center of the mall)",(End of the mall),0.176991,0.606061,1.850942
6,(End of the mall),"(Near/Beside the entrance, Center of the mall)",0.176991,0.540541,1.850942
4,"(Near/Beside the entrance, End of the mall)",(Center of the mall),0.176991,0.666667,1.477124
5,"(Center of the mall, End of the mall)",(Near/Beside the entrance),0.176991,1.0,1.284091
0,(End of the mall),(Center of the mall),0.176991,0.540541,1.197668



Association Rules for : amenities_beside_elevator


Unnamed: 0,antecedents,consequents,support,confidence,lift
2,"(Near/Beside the entrance, Center of the mall)",(End of the mall),0.106195,0.666667,3.766667
5,(End of the mall),"(Near/Beside the entrance, Center of the mall)",0.106195,0.6,3.766667
4,"(Center of the mall, End of the mall)",(Near/Beside the entrance),0.106195,0.75,2.230263
1,(End of the mall),(Near/Beside the entrance),0.115044,0.65,1.932895
3,"(Near/Beside the entrance, End of the mall)",(Center of the mall),0.106195,0.923077,1.158974



Association Rules for : amenities_beside_escalator


Unnamed: 0,antecedents,consequents,support,confidence,lift
3,"(Near/Beside the entrance, Center of the mall)",(End of the mall),0.176991,0.769231,2.069597
5,"(Center of the mall, End of the mall)",(Near/Beside the entrance),0.176991,0.869565,1.584853
1,(Near/Beside the entrance),(End of the mall),0.309735,0.564516,1.518817
2,(End of the mall),(Near/Beside the entrance),0.309735,0.833333,1.518817
4,"(Near/Beside the entrance, End of the mall)",(Center of the mall),0.176991,0.571429,0.88454
