# Regole di associazione

In [79]:
import numpy as np
import pandas as pd

In [80]:
df = pd.read_csv("/home/anisa_bakiu/Downloads/DM_progetto/pre-elaborazione/em_processed.csv")

In [81]:
df.columns.to_list()

['Disaster Group',
 'Disaster Subgroup',
 'Disaster Type',
 'Disaster Subtype',
 'Country',
 'Subregion',
 'Total Deaths',
 'No. Injured',
 'No. Affected',
 'Total Affected',
 "Total Damage, Adjusted ('000 US$)",
 'duration_days']

In [82]:
df = df.rename(columns={"Total Damage, Adjusted ('000 US$)": "Total Damage"})

In [83]:
quant_vars = ['Total Deaths', 'No. Injured', 'No. Affected', 
              'Total Affected', "Total Damage", 'duration_days']

for col in quant_vars:
    df[col] = pd.qcut(df[col].rank(method='first'), 
                            q=3, 
                            labels=[f'Low', f'Med', f'High'])

In [84]:
df_encoded = pd.get_dummies(df)

## APriori

In [85]:
from mlxtend.frequent_patterns import apriori, association_rules

In [86]:
# Range di valori per support e confidence
support_levels = [0.03, 0.05, 0.07]  # min_support
confidence_levels = [0.6, 0.7, 0.8]  # min_threshold

# Lista per raccogliere risultati
regole_apriori = []

for sup in support_levels:
    freq_items = apriori(df_encoded, min_support=sup, use_colnames=True)
    
    for conf in confidence_levels:
        rules = association_rules(freq_items, metric="confidence", min_threshold=conf)
        rules['min_support'] = sup
        rules['min_confidence'] = conf
        regole_apriori.append(rules)

# Combina tutte le regole in un unico DataFrame
regole_apriori_df = pd.concat(regole_apriori, ignore_index=True)

# Ordina per lift decrescente
regole_apriori_df.sort_values(by='lift', ascending=False, inplace=True)

In [87]:
for i, row in regole_apriori_df.sort_values('lift', ascending=False).head(400).iterrows():
    print(f"SE {set(row['antecedents'])}  ->  ALLORA {set(row['consequents'])}")
    print(f"   support={row['support']:.2f}, confidence={row['confidence']:.2f}, lift={row['lift']:.2f}\n")

SE {'Disaster Subtype_Fire (Miscellaneous)'}  ->  ALLORA {'Disaster Type_Fire (Miscellaneous)', 'Disaster Group_Technological'}
   support=0.03, confidence=1.00, lift=32.73

SE {'Disaster Type_Fire (Miscellaneous)'}  ->  ALLORA {'Disaster Subtype_Fire (Miscellaneous)', 'Disaster Group_Technological'}
   support=0.03, confidence=1.00, lift=32.73

SE {'Disaster Subtype_Fire (Miscellaneous)', 'Disaster Group_Technological'}  ->  ALLORA {'Disaster Type_Fire (Miscellaneous)', 'Disaster Subgroup_Miscellaneous accident'}
   support=0.03, confidence=1.00, lift=32.73

SE {'Disaster Type_Fire (Miscellaneous)', 'Disaster Group_Technological'}  ->  ALLORA {'Disaster Subtype_Fire (Miscellaneous)', 'Disaster Subgroup_Miscellaneous accident'}
   support=0.03, confidence=1.00, lift=32.73

SE {'Disaster Subtype_Fire (Miscellaneous)', 'Disaster Group_Technological'}  ->  ALLORA {'Disaster Type_Fire (Miscellaneous)', 'Disaster Subgroup_Miscellaneous accident'}
   support=0.03, confidence=1.00, lift=32.73

### Pattern orientati 

In [88]:
cols_for_rules = (
    [c for c in df_encoded.columns if c.startswith('Disaster Type_')]
    + [c for c in df_encoded.columns if c.startswith('Total Deaths_')]
    + [c for c in df_encoded.columns if c.startswith('Total Damage_')]
    + [c for c in df_encoded.columns if c.startswith('Total Affected_')]
)

df_impatto = df_encoded[cols_for_rules]

In [89]:
support_levels = [0.03, 0.05, 0.07]  
confidence_levels = [0.5, 0.6, 0.7, 0.8]  

regole2 = []

for sup in support_levels:
    freq_items = apriori(df_impatto, min_support=sup, use_colnames=True)
    
    for conf in confidence_levels:
        rules = association_rules(freq_items, metric="confidence", min_threshold=conf)
        rules['min_support'] = sup
        rules['min_confidence'] = conf
        regole2.append(rules)

regole2df = pd.concat(regole2, ignore_index=True)

regole2df.sort_values(by='lift', ascending=False, inplace=True)

In [90]:
for i, row in regole2df.sort_values('lift', ascending=False).head(150).iterrows():
    print(f"SE {set(row['antecedents'])}  ->  ALLORA {set(row['consequents'])}")
    print(f"   support={row['support']:.2f}, confidence={row['confidence']:.2f}, lift={row['lift']:.2f}\n")

SE {'Total Damage_Low', 'Disaster Type_Road'}  ->  ALLORA {'Total Deaths_Med', 'Total Affected_Low'}
   support=0.04, confidence=0.64, lift=4.88

SE {'Total Damage_Low', 'Disaster Type_Road'}  ->  ALLORA {'Total Deaths_Med', 'Total Affected_Low'}
   support=0.04, confidence=0.64, lift=4.88

SE {'Total Affected_Low', 'Disaster Type_Road'}  ->  ALLORA {'Total Damage_Low', 'Total Deaths_Med'}
   support=0.04, confidence=0.58, lift=4.74

SE {'Total Damage_Low', 'Total Deaths_Med', 'Total Affected_Low'}  ->  ALLORA {'Disaster Type_Road'}
   support=0.04, confidence=0.51, lift=4.61

SE {'Disaster Type_Road'}  ->  ALLORA {'Total Damage_Low', 'Total Affected_Low'}
   support=0.07, confidence=0.61, lift=3.53

SE {'Disaster Type_Road'}  ->  ALLORA {'Total Damage_Low', 'Total Affected_Low'}
   support=0.07, confidence=0.61, lift=3.53

SE {'Disaster Type_Road'}  ->  ALLORA {'Total Damage_Low', 'Total Affected_Low'}
   support=0.07, confidence=0.61, lift=3.53

SE {'Disaster Type_Road'}  ->  ALLORA 

In [91]:
cols_for_rules = (
    [c for c in df_encoded.columns if c.startswith('Disaster Type_')]
    + [c for c in df_encoded.columns if c.startswith('Subregion_')]
    + [c for c in df_encoded.columns if c.startswith('Total Deaths_')]
    + [c for c in df_encoded.columns if c.startswith('Total Damage_')]
)

df_regioni = df_encoded[cols_for_rules]

support_levels = [0.03, 0.05, 0.07]  
confidence_levels = [0.5, 0.6, 0.7, 0.8]  

regole3 = []

for sup in support_levels:
    freq_items = apriori(df_regioni, min_support=sup, use_colnames=True)
    
    for conf in confidence_levels:
        rules = association_rules(freq_items, metric="confidence", min_threshold=conf)
        rules['min_support'] = sup
        rules['min_confidence'] = conf
        regole3.append(rules)

regole3df = pd.concat(regole3, ignore_index=True)

for i, row in regole3df.sort_values('lift', ascending=False).head(100).iterrows():
    print(f"SE {set(row['antecedents'])}  ->  ALLORA {set(row['consequents'])}")
    print(f"   support={row['support']:.2f}, confidence={row['confidence']:.2f}, lift={row['lift']:.2f}\n")

SE {'Disaster Type_Epidemic'}  ->  ALLORA {'Subregion_Sub-Saharan Africa'}
   support=0.03, confidence=0.57, lift=3.21

SE {'Disaster Type_Air'}  ->  ALLORA {'Total Damage_High'}
   support=0.03, confidence=0.79, lift=2.38

SE {'Disaster Type_Air'}  ->  ALLORA {'Total Damage_High'}
   support=0.03, confidence=0.79, lift=2.38

SE {'Disaster Type_Air'}  ->  ALLORA {'Total Damage_High'}
   support=0.03, confidence=0.79, lift=2.38

SE {'Disaster Type_Epidemic'}  ->  ALLORA {'Total Damage_Med'}
   support=0.04, confidence=0.78, lift=2.33

SE {'Disaster Type_Epidemic'}  ->  ALLORA {'Total Damage_Med'}
   support=0.04, confidence=0.78, lift=2.33

SE {'Disaster Type_Epidemic'}  ->  ALLORA {'Total Damage_Med'}
   support=0.04, confidence=0.78, lift=2.33

SE {'Disaster Type_Water'}  ->  ALLORA {'Total Damage_High'}
   support=0.04, confidence=0.68, lift=2.03

SE {'Disaster Type_Water'}  ->  ALLORA {'Total Damage_High'}
   support=0.04, confidence=0.68, lift=2.03

SE {'Disaster Type_Road'}  ->  A

  regole3df = pd.concat(regole3, ignore_index=True)


## FP-Growth

In [92]:
from mlxtend.frequent_patterns import fpgrowth, association_rules

# Calcolo degli itemset frequenti
frequent_itemsets = fpgrowth(df_encoded, min_support=0.06, use_colnames=True)

# Generazione delle regole di associazione
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

In [93]:
for i, row in rules.sort_values('lift', ascending=False).head(300).iterrows():
    print(f"SE {set(row['antecedents'])}  ->  ALLORA {set(row['consequents'])}")
    print(f"   support={row['support']:.2f}, confidence={row['confidence']:.2f}, lift={row['lift']:.2f}\n")

SE {'Disaster Subtype_Water'}  ->  ALLORA {'Disaster Type_Water', 'Disaster Subgroup_Transport', 'Disaster Group_Technological'}
   support=0.06, confidence=1.00, lift=16.10

SE {'Disaster Type_Water'}  ->  ALLORA {'Disaster Subtype_Water'}
   support=0.06, confidence=1.00, lift=16.10

SE {'Disaster Subtype_Water'}  ->  ALLORA {'Disaster Type_Water'}
   support=0.06, confidence=1.00, lift=16.10

SE {'Disaster Type_Water'}  ->  ALLORA {'Disaster Subtype_Water', 'Disaster Subgroup_Transport', 'Disaster Group_Technological'}
   support=0.06, confidence=1.00, lift=16.10

SE {'Disaster Type_Water'}  ->  ALLORA {'Disaster Subtype_Water', 'Disaster Group_Technological'}
   support=0.06, confidence=1.00, lift=16.10

SE {'Disaster Subtype_Water', 'Disaster Group_Technological'}  ->  ALLORA {'Disaster Type_Water', 'Disaster Subgroup_Transport'}
   support=0.06, confidence=1.00, lift=16.10

SE {'Disaster Type_Water', 'Disaster Subgroup_Transport'}  ->  ALLORA {'Disaster Subtype_Water', 'Disaster 