# Association Rule Mining

In [1]:
import pandas as pd

In [7]:
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
df = pd.read_csv('df_processed.csv')

In [3]:
df_assoc = df.copy()
df_assoc['PM2.5_Level'] = pd.cut(df_assoc['PM2.5'], bins=3, labels=['Low', 'Medium', 'High'])
df_assoc['NO2_Level'] = pd.cut(df_assoc['NO2'], bins=3, labels=['Low', 'Medium', 'High'])

In [5]:
rules_df = df_assoc[['PM2.5_Level', 'NO2_Level', 'AQI_Bucket', 'Season', 'Is_Weekend']]

rules_df['Is_Weekend'] = rules_df['Is_Weekend'].map({0: 'Weekday', 1: 'Weekend'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rules_df['Is_Weekend'] = rules_df['Is_Weekend'].map({0: 'Weekday', 1: 'Weekend'})


In [10]:
df_basket = pd.get_dummies(rules_df.dropna())
print("\nFirst 5 rows of the one-hot encoded data for rule mining:")
print(df_basket.head())


First 5 rows of the one-hot encoded data for rule mining:
   PM2.5_Level_Low  PM2.5_Level_Medium  PM2.5_Level_High  NO2_Level_Low  \
0             True               False             False           True   
1             True               False             False           True   
2             True               False             False           True   
3             True               False             False           True   
4             True               False             False           True   

   NO2_Level_Medium  NO2_Level_High  AQI_Bucket_Good  AQI_Bucket_Moderate  \
0             False           False            False                 True   
1             False           False            False                 True   
2             False           False            False                 True   
3             False           False            False                 True   
4             False           False            False                 True   

   AQI_Bucket_Poor  AQI_Buc

In [11]:
frequent_itemsets = apriori(df_basket, min_support=0.05, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)

In [12]:
rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])
print("\nTop Association Rules found with new features:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10))


Top Association Rules found with new features:
                                          antecedents  \
4                           (AQI_Bucket_Satisfactory)   
0                           (AQI_Bucket_Satisfactory)   
2          (AQI_Bucket_Satisfactory, PM2.5_Level_Low)   
16           (NO2_Level_Low, AQI_Bucket_Satisfactory)   
6            (NO2_Level_Low, AQI_Bucket_Satisfactory)   
14  (NO2_Level_Low, AQI_Bucket_Satisfactory, PM2.5...   
20                          (AQI_Bucket_Satisfactory)   
8                           (AQI_Bucket_Satisfactory)   
18         (AQI_Bucket_Satisfactory, PM2.5_Level_Low)   
24      (AQI_Bucket_Satisfactory, Is_Weekend_Weekday)   

                                         consequents   support  confidence  \
4                  (Season_Monsoon, PM2.5_Level_Low)  0.127764    0.458779   
0                                   (Season_Monsoon)  0.127764    0.458779   
2                                   (Season_Monsoon)  0.127764    0.458779   
16          

In [14]:
# Your existing code to generate and sort the rules
rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])



# Loop through the top 10 rules and print them in a readable format
for index, rule in rules.head(10).iterrows():
    # Convert the frozensets to clean strings
    antecedents = ', '.join(list(rule['antecedents']))
    consequents = ', '.join(list(rule['consequents']))
    
    # Get the metrics
    confidence = rule['confidence']
    lift = rule['lift']
    
    # Print the rule
    print(f"Rule: IF {{{antecedents}}} -> THEN {{{consequents}}}")
    
    # Print the metrics in a formatted way
    print(f"  - Confidence: {confidence:.2%} | Lift: {lift:.2f}\n")

Rule: IF {AQI_Bucket_Satisfactory} -> THEN {Season_Monsoon, PM2.5_Level_Low}
  - Confidence: 45.88% | Lift: 1.45

Rule: IF {AQI_Bucket_Satisfactory} -> THEN {Season_Monsoon}
  - Confidence: 45.88% | Lift: 1.45

Rule: IF {AQI_Bucket_Satisfactory, PM2.5_Level_Low} -> THEN {Season_Monsoon}
  - Confidence: 45.88% | Lift: 1.45

Rule: IF {NO2_Level_Low, AQI_Bucket_Satisfactory} -> THEN {Season_Monsoon, PM2.5_Level_Low}
  - Confidence: 45.88% | Lift: 1.45

Rule: IF {NO2_Level_Low, AQI_Bucket_Satisfactory} -> THEN {Season_Monsoon}
  - Confidence: 45.88% | Lift: 1.45

Rule: IF {NO2_Level_Low, AQI_Bucket_Satisfactory, PM2.5_Level_Low} -> THEN {Season_Monsoon}
  - Confidence: 45.88% | Lift: 1.45

Rule: IF {AQI_Bucket_Satisfactory} -> THEN {NO2_Level_Low, Season_Monsoon, PM2.5_Level_Low}
  - Confidence: 45.85% | Lift: 1.46

Rule: IF {AQI_Bucket_Satisfactory} -> THEN {NO2_Level_Low, Season_Monsoon}
  - Confidence: 45.85% | Lift: 1.45

Rule: IF {AQI_Bucket_Satisfactory, PM2.5_Level_Low} -> THEN {NO2