In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
ILUMINA_CLEAN_PATH = Path('../data/Ilumina/clean')

In [3]:
IluminaPylumCodOH = pd.read_csv(ILUMINA_CLEAN_PATH / "IluminaPylumCodOH.csv", index_col=0)
IluminaPylumCodOH = IluminaPylumCodOH[IluminaPylumCodOH["Fertil"] == 0]
IluminaPylumCodOH = IluminaPylumCodOH.drop(columns=["Fertil"])

* Support -> Probabilitdad de que un evento ocurra = freq(X,Y) / N
* Confidence -> Medida de probabilidad condicional = freq(X,Y) / fre(X)
* Lift -> Support/Support(X) x Support(Y)

In [8]:
frequent_itemsets = apriori(IluminaPylumCodOH, min_support=0.85, use_colnames=True, low_memory=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.904762,(Tenericutes__0.0)
1,0.976190,(Verrucomicrobia__0.0)
2,0.880952,(Fusobacteria__0.0)
3,0.880952,(Gemmatimonadetes__0.0)
4,0.904762,(Synergistetes__0.0)
...,...,...
62,0.857143,"(Kiritimatiellaeota__0.0, Deinococcus.Thermus_..."
63,0.904762,"(candidate.division.Zixibacteria__0.0, Deinoco..."
64,0.857143,"(candidate.division.Zixibacteria__0.0, Kiritim..."
65,0.857143,"(candidate.division.Zixibacteria__0.0, Kiritim..."


In [15]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold = 1)

In [16]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Deferribacteres__0.0),(Deinococcus.Thermus__0.0),0.857143,0.97619,0.857143,1.0,1.02439,0.020408,inf,0.166667
1,(Deinococcus.Thermus__0.0),(Deferribacteres__0.0),0.97619,0.857143,0.857143,0.878049,1.02439,0.020408,1.171429,1.0
2,(candidate.division.Zixibacteria__0.0),(Acidobacteria__0.0),0.952381,0.952381,0.928571,0.975,1.02375,0.021542,1.904762,0.487179
3,(Acidobacteria__0.0),(candidate.division.Zixibacteria__0.0),0.952381,0.952381,0.928571,0.975,1.02375,0.021542,1.904762,0.487179
4,"(candidate.division.Zixibacteria__0.0, Verruco...",(Acidobacteria__0.0),0.928571,0.952381,0.904762,0.974359,1.023077,0.020408,1.857143,0.315789
5,"(Verrucomicrobia__0.0, Acidobacteria__0.0)",(candidate.division.Zixibacteria__0.0),0.928571,0.952381,0.904762,0.974359,1.023077,0.020408,1.857143,0.315789
6,(candidate.division.Zixibacteria__0.0),"(Verrucomicrobia__0.0, Acidobacteria__0.0)",0.952381,0.928571,0.904762,0.95,1.023077,0.020408,1.428571,0.473684
7,(Acidobacteria__0.0),"(candidate.division.Zixibacteria__0.0, Verruco...",0.952381,0.928571,0.904762,0.95,1.023077,0.020408,1.428571,0.473684
8,"(candidate.division.Zixibacteria__0.0, Candida...",(Acidobacteria__0.0),0.880952,0.952381,0.857143,0.972973,1.021622,0.018141,1.761905,0.177778
9,"(Candidatus.Saccharibacteria__0.0, Acidobacter...",(candidate.division.Zixibacteria__0.0),0.880952,0.952381,0.857143,0.972973,1.021622,0.018141,1.761905,0.177778


In [7]:
filtered_rules = rules[rules['consequents'].apply(lambda x: len(x) > 1)]
filtered_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
23,(Kiritimatiellaeota__0.0),"(Nitrospirae__0.0, Fibrobacteres__0.0)",1.0,1.0,1.0,1.0,1.0,0.0,inf,0.0
24,(Nitrospirae__0.0),"(Kiritimatiellaeota__0.0, Fibrobacteres__0.0)",1.0,1.0,1.0,1.0,1.0,0.0,inf,0.0
25,(Fibrobacteres__0.0),"(Kiritimatiellaeota__0.0, Nitrospirae__0.0)",1.0,1.0,1.0,1.0,1.0,0.0,inf,0.0
29,(Kiritimatiellaeota__0.0),"(candidate.division.Zixibacteria__0.0, Fibroba...",1.0,1.0,1.0,1.0,1.0,0.0,inf,0.0
30,(candidate.division.Zixibacteria__0.0),"(Kiritimatiellaeota__0.0, Fibrobacteres__0.0)",1.0,1.0,1.0,1.0,1.0,0.0,inf,0.0
...,...,...,...,...,...,...,...,...,...,...
175,(Fibrobacteres__0.0),"(Kiritimatiellaeota__0.0, Thermodesulfobacteri...",1.0,1.0,1.0,1.0,1.0,0.0,inf,0.0
176,(Kiritimatiellaeota__0.0),"(candidate.division.Zixibacteria__0.0, Thermod...",1.0,1.0,1.0,1.0,1.0,0.0,inf,0.0
177,(Nitrospirae__0.0),"(Kiritimatiellaeota__0.0, Thermodesulfobacteri...",1.0,1.0,1.0,1.0,1.0,0.0,inf,0.0
178,(candidate.division.Zixibacteria__0.0),"(Kiritimatiellaeota__0.0, Thermodesulfobacteri...",1.0,1.0,1.0,1.0,1.0,0.0,inf,0.0
