In [1]:
# Array manipulation
import numpy as np

# Random variables generator
import random as rand

# data processing / .csv manipulation and reading
import pandas as pd


# Data visualization
import seaborn as sns
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import style

# Algorithms / AI
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpmax, fpgrowth, association_rules

In [2]:
rules = pd.read_csv('generated/brasil_rules.csv') # Objects are loaded with infos from .csv
rules.describe()

Unnamed: 0.1,Unnamed: 0,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
count,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0
mean,134.153846,0.251719,0.098357,0.088648,0.362495,3.659323,0.064359,1.448854,0.969194
std,78.822423,0.039033,0.026726,0.021434,0.107507,0.325873,0.016083,0.225215,0.019056
min,18.0,0.205027,0.061759,0.061759,0.191964,3.108259,0.04189,1.161138,0.951495
25%,59.0,0.236984,0.083662,0.078276,0.323202,3.644205,0.056797,1.346504,0.955147
50%,175.0,0.236984,0.092998,0.082226,0.372331,3.687211,0.059209,1.447321,0.958095
75%,184.0,0.24219,0.102693,0.09246,0.390152,3.846272,0.068421,1.473421,0.978629
max,219.0,0.321724,0.150808,0.131777,0.556061,4.105309,0.096038,1.912856,1.0


In [3]:
rules.drop('Unnamed: 0', axis='columns', inplace=True)

In [4]:
for column in ['antecedents', 'consequents']:
  for index, row in enumerate( rules[column] ):
    
    rules.loc[index, column] = row.replace('frozenset({', '').replace('})', '').replace(' ', '')

rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,"'Atingido_Alagamentos','Tem_Red_Des'","'Atingido_Enchente','Atingido_Enxurrada','Tem_...",0.236984,0.150808,0.131777,0.556061,3.687211,0.096038,1.912856,0.955147
1,"'Atingido_Alagamentos','Tem_Instr_Deslizamento'","'Atingido_Enchente','Tem_Red_Des','Atingido_En...",0.24219,0.149192,0.131777,0.544107,3.647021,0.095644,1.866244,0.957765
2,'Atingido_Enxurrada','Nao_Red_Enxurrada',0.307361,0.067145,0.067145,0.218458,3.253505,0.046508,1.193608,1.0
3,"'Atingido_Alagamentos','Tem_Red_Des'","'Atingido_Des','Atingido_Enxurrada','Tem_Instr...",0.236984,0.102693,0.090485,0.381818,3.718055,0.066148,1.451526,0.958095
4,"'Atingido_Alagamentos','Tem_Red_Des'","'Atingido_Enchente','Atingido_Des','Atingido_E...",0.236984,0.093178,0.083483,0.352273,3.780653,0.061401,1.400006,0.963932
5,"'Atingido_Alagamentos','Tem_Red_Des'","'Atingido_Enchente','Atingido_Des','Atingido_E...",0.236984,0.083662,0.078276,0.330303,3.948043,0.05845,1.368287,0.978629
6,"'Atingido_Alagamentos','Tem_Instr_Deslizamento'","'Atingido_Enchente','Atingido_Des','Tem_Red_De...",0.24219,0.088689,0.078276,0.323202,3.644205,0.056797,1.346504,0.957485
7,"'Atingido_Enchente','Tem_Red_Des'","'Atingido_Alagamentos','Atingido_Des','Atingid...",0.210233,0.091562,0.078276,0.372331,4.066442,0.059027,1.447321,0.95482
8,"'Tem_Red_Des','Atingido_Enxurrada'","'Atingido_Alagamentos','Atingido_Des','Atingid...",0.205027,0.092998,0.078276,0.381786,4.105309,0.059209,1.467133,0.951495
9,"'Atingido_Alagamentos','Tem_Red_Des'","'Atingido_Enchente','Atingido_Des'",0.236984,0.113285,0.098205,0.414394,3.657962,0.071358,1.514183,0.952305


In [5]:
"""print(f'''
This:\t\t{rules.values[100][0]+' and '+rules.values[100][1]}
Becomes this: \t{sorted((rules.values[100][0]+','+rules.values[100][1]).split(','))}

This:\t\t{rules.values[101][0]+' and '+rules.values[101][1]}
Becomes this: \t{sorted((rules.values[101][0]+','+rules.values[101][1]).split(','))}

Is equal: {sorted((rules.values[101][0]+','+rules.values[101][1]).split(',')) == sorted((rules.values[100][0]+','+rules.values[100][1]).split(','))}.''')"""

"print(f'''\nThis:\t\t{rules.values[100][0]+' and '+rules.values[100][1]}\nBecomes this: \t{sorted((rules.values[100][0]+','+rules.values[100][1]).split(','))}\n\nThis:\t\t{rules.values[101][0]+' and '+rules.values[101][1]}\nBecomes this: \t{sorted((rules.values[101][0]+','+rules.values[101][1]).split(','))}\n\nIs equal: {sorted((rules.values[101][0]+','+rules.values[101][1]).split(',')) == sorted((rules.values[100][0]+','+rules.values[100][1]).split(','))}.''')"

In [6]:
rules.loc[:, 'zhangs_metric'].idxmax()

12

In [7]:
# Now, the only thing that matters to understanding the association rules
# above is a list that shortens the association rules that are repeated.

def pruneDataFrame( df ):
  
  organized_rules = []
  
  for index, row in enumerate( df.values ):
    
    organized_rules.append( sorted((row[0]+','+row[1]).split(',')) )
  
  duplicates = []
  
  for rule_index, rule in enumerate(organized_rules):
    duplicates.append(list())
    for comp_index, comparator in enumerate(organized_rules):
      if comparator == rule:
        duplicates[rule_index].append(comp_index)
  
  max_set = set()
  
  for dups in duplicates:
    if len(dups) > 1:
      
      max_index = df.loc[ dups, 'zhangs_metric'].idxmax()

    else:
      max_index = dups[0]
    
    max_set.add(max_index)
  
  return df.loc[ list(max_set) ]

pruned_rules = pruneDataFrame( rules )

In [8]:
print(pruned_rules.shape)
pruned_rules.describe()

(9, 10)


Unnamed: 0,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
count,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,0.264213,0.094953,0.087313,0.342117,3.563114,0.062844,1.406809,0.975619
std,0.039794,0.026333,0.020354,0.109245,0.321201,0.015683,0.214079,0.019785
min,0.236984,0.061759,0.061759,0.191964,3.108259,0.04189,1.161138,0.952305
25%,0.236984,0.082226,0.078276,0.25558,3.253505,0.055772,1.232872,0.958095
50%,0.236984,0.093178,0.083483,0.352273,3.657962,0.061401,1.400006,0.969846
75%,0.307361,0.102693,0.09246,0.390152,3.780653,0.068421,1.473421,1.0
max,0.321724,0.149192,0.131777,0.544107,3.948043,0.095644,1.866244,1.0


In [11]:
filename = f'generated/pruned_rules.xlsx'
with open(filename, 'w') as output_handle: # Save as .csv for visualization
  pruned_rules.to_excel(filename, columns=pruned_rules.columns)

  now = datetime.datetime.utcnow()
  workbook.properties.modified = datetime.datetime.utcnow()


In [10]:
pruned_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
1,"'Atingido_Alagamentos','Tem_Instr_Deslizamento'","'Atingido_Enchente','Tem_Red_Des','Atingido_En...",0.24219,0.149192,0.131777,0.544107,3.647021,0.095644,1.866244,0.957765
2,'Atingido_Enxurrada','Nao_Red_Enxurrada',0.307361,0.067145,0.067145,0.218458,3.253505,0.046508,1.193608,1.0
3,"'Atingido_Alagamentos','Tem_Red_Des'","'Atingido_Des','Atingido_Enxurrada','Tem_Instr...",0.236984,0.102693,0.090485,0.381818,3.718055,0.066148,1.451526,0.958095
4,"'Atingido_Alagamentos','Tem_Red_Des'","'Atingido_Enchente','Atingido_Des','Atingido_E...",0.236984,0.093178,0.083483,0.352273,3.780653,0.061401,1.400006,0.963932
5,"'Atingido_Alagamentos','Tem_Red_Des'","'Atingido_Enchente','Atingido_Des','Atingido_E...",0.236984,0.083662,0.078276,0.330303,3.948043,0.05845,1.368287,0.978629
9,"'Atingido_Alagamentos','Tem_Red_Des'","'Atingido_Enchente','Atingido_Des'",0.236984,0.113285,0.098205,0.414394,3.657962,0.071358,1.514183,0.952305
10,"'Atingido_Alagamentos','Tem_Red_Des'","'Atingido_Enchente','Atingido_Des','Tem_Instr_...",0.236984,0.101436,0.09246,0.390152,3.846272,0.068421,1.473421,0.969846
11,'Atingido_Enchente','Nao_Red_Enchente',0.321724,0.082226,0.082226,0.25558,3.108259,0.055772,1.232872,1.0
12,'Atingido_Enchente',"'Atingido_Alagamentos','Nao_Red_Enchente'",0.321724,0.061759,0.061759,0.191964,3.108259,0.04189,1.161138,1.0
