# Travail 3

In [116]:
from scipy.io import arff
import pandas as pd
import wittgenstein as lw

from prism_rules import PrismRules
from mlxtend.frequent_patterns import apriori, association_rules

## Base Champignon

In [117]:
file = "datasets/mushrooms_weka_training.arff"

arff_file = arff.loadarff(file)
data = pd.DataFrame(arff_file[0])

data = data.map(lambda x: x.decode() if isinstance(x, bytes) else x)
data

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises?,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat,class
0,x,s,n,t,p,f,c,n,k,e,...,w,w,p,w,o,p,k,s,u,p
1,x,s,y,t,a,f,c,b,k,e,...,w,w,p,w,o,p,n,n,g,e
2,b,s,w,t,l,f,c,b,n,e,...,w,w,p,w,o,p,n,n,m,e
3,x,y,w,t,p,f,c,n,n,e,...,w,w,p,w,o,p,k,s,u,p
4,x,s,g,f,n,f,w,b,k,t,...,w,w,p,w,o,e,n,a,g,e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6995,f,y,e,f,s,f,c,n,b,t,...,p,w,p,w,o,e,w,v,d,p
6996,k,y,e,f,s,f,c,n,b,t,...,w,p,p,w,o,e,w,v,d,p
6997,k,f,w,f,n,f,w,b,p,e,...,w,w,p,w,t,p,w,s,g,e
6998,x,s,g,f,n,f,w,b,w,e,...,w,w,p,w,t,p,w,s,g,e


In [118]:
X = data.drop(columns='class')
y = data['class']

### Méthode Ripper

In [119]:
ripper_clf = lw.RIPPER() # Or irep_clf = lw.IREP() to build a model using IREP
ripper_clf.fit(X, y, pos_class="e") # Or pass X and y data to .fit
ripper_clf.out_model()

[[odor=n ^ stalk-shape=t] V
[bruises?=t ^ stalk-root=c] V
[odor=n ^ habitat=w] V
[odor=n ^ bruises?=f ^ stalk-surface-above-ring=s] V
[stalk-root=r] V
[gill-spacing=w ^ stalk-shape=t] V
[odor=n ^ gill-spacing=w ^ bruises?=f] V
[ring-number=t ^ cap-shape=x] V
[ring-number=t ^ cap-color=n]]


### Méthode Prism

In [120]:
prism = PrismRules()

rules = prism.get_prism_rules(data, "class")


........................................................................
Target: p
........................................................................
odor = f
   Support:  the target has value: 'p' for 100.000% of the 1952 rows matching the rule 
   Coverage: the rule matches: 1952 out of 3256 rows for target value: 'p'. This is:
      59.951% of total rows for target value: 'p'
      27.886% of total rows in data
gill-color = b
   Support:  The target has value: 'p' for 100.000% of the 738 remaining rows matching the rule
   Coverage: The rule matches: 738 out of 1304 rows remaining for target value: 'p'. This is:
      56.595% of remaining rows for target value: 'p'
      22.666% of total rows for target value: 'p'
      10.543% of total rows in data
odor = p
   Support:  The target has value: 'p' for 100.000% of the 256 remaining rows matching the rule
   Coverage: The rule matches: 256 out of 566 rows remaining for target value: 'p'. This is:
      45.230% of remaining rows 

### Méthode Apriori

In [121]:
data_train_dummies = pd.get_dummies(data)
data_train_dummies

Unnamed: 0,cap-shape_b,cap-shape_c,cap-shape_f,cap-shape_k,cap-shape_s,cap-shape_x,cap-surface_f,cap-surface_g,cap-surface_s,cap-surface_y,...,population_y,habitat_d,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w,class_e,class_p
0,False,False,False,False,False,True,False,False,True,False,...,False,False,False,False,False,False,True,False,False,True
1,False,False,False,False,False,True,False,False,True,False,...,False,False,True,False,False,False,False,False,True,False
2,True,False,False,False,False,False,False,False,True,False,...,False,False,False,False,True,False,False,False,True,False
3,False,False,False,False,False,True,False,False,False,True,...,False,False,False,False,False,False,True,False,False,True
4,False,False,False,False,False,True,False,False,True,False,...,False,False,True,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6995,False,False,True,False,False,False,False,False,False,True,...,False,True,False,False,False,False,False,False,False,True
6996,False,False,False,True,False,False,False,False,False,True,...,False,True,False,False,False,False,False,False,False,True
6997,False,False,False,True,False,False,True,False,False,False,...,False,False,True,False,False,False,False,False,True,False
6998,False,False,False,False,False,True,False,False,True,False,...,False,False,True,False,False,False,False,False,True,False


In [122]:
frequent_itemsets = apriori(data_train_dummies, min_support=0.40, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets = frequent_itemsets[frequent_itemsets['length'] < 5]
# frequent_itemsets[frequent_itemsets["support"] > 0.7]

frequent_itemsets = frequent_itemsets.sort_values(by='support', ascending=False)
frequent_itemsets

# frequent_itemsets_with_class_e = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: 'class_e' in x)]
# frequent_itemsets_with_class_e = frequent_itemsets_with_class_e.sort_values(by='support', ascending=False)
# frequent_itemsets_with_class_e

# frequent_itemsets_with_class_p = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: 'class_p' in x)]
# frequent_itemsets_with_class_p = frequent_itemsets_with_class_p.sort_values(by='support', ascending=False)
# frequent_itemsets_with_class_p

# attribute_filter = "class_e"
# frequent_itemsets_filtered = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: attribute_filter in x)]
# frequent_itemsets_filtered = frequent_itemsets_filtered.sort_values(by='support', ascending=False)

# frequent_itemsets_filtered = frequent_itemsets_filtered[frequent_itemsets_filtered['length'] > 1]
# frequent_itemsets_filtered

Unnamed: 0,support,itemsets,length
16,1.000000,(veil-type_p),1
123,0.997571,"(veil-type_p, veil-color_w)",2
17,0.997571,(veil-color_w),1
6,0.997429,(gill-attachment_f),1
64,0.997429,"(veil-type_p, gill-attachment_f)",2
...,...,...,...
286,0.400571,"(gill-spacing_c, veil-color_w, stalk-shape_e)",3
614,0.400571,"(gill-spacing_c, veil-type_p, veil-color_w, st...",4
492,0.400286,"(gill-spacing_c, veil-color_w, stalk-shape_e, ...",4
491,0.400286,"(gill-spacing_c, veil-type_p, stalk-shape_e, g...",4


In [123]:
rules = association_rules(frequent_itemsets, num_itemsets=len(data), metric="lift", min_threshold=1)
rules

  cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(veil-type_p),(veil-color_w),1.000000,0.997571,0.997571,0.997571,1.000000,1.0,0.000000,1.000000,0.000000,0.997571,0.000000,0.998786
1,(veil-color_w),(veil-type_p),0.997571,1.000000,0.997571,1.000000,1.000000,1.0,0.000000,inf,0.000000,0.997571,0.000000,0.998786
2,(veil-type_p),(gill-attachment_f),1.000000,0.997429,0.997429,0.997429,1.000000,1.0,0.000000,1.000000,0.000000,0.997429,0.000000,0.998714
3,(gill-attachment_f),(veil-type_p),0.997429,1.000000,0.997429,1.000000,1.000000,1.0,0.000000,inf,0.000000,0.997429,0.000000,0.998714
4,(veil-color_w),(gill-attachment_f),0.997571,0.997429,0.997286,0.999714,1.002291,1.0,0.002279,8.978143,0.941160,0.999570,0.888618,0.999785
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4695,(stalk-shape_e),"(gill-spacing_c, veil-type_p, gill-attachment_f)",0.430571,0.846286,0.400286,0.929662,1.098520,1.0,0.035899,2.185353,0.157498,0.456649,0.542408,0.701326
4696,"(gill-spacing_c, gill-attachment_f)",(stalk-shape_e),0.846286,0.430571,0.400286,0.472991,1.098520,1.0,0.035899,1.080492,0.583447,0.456649,0.074495,0.701326
4697,"(stalk-shape_e, gill-attachment_f)",(gill-spacing_c),0.428000,0.848857,0.400286,0.935247,1.101772,1.0,0.036975,2.334144,0.161488,0.456649,0.571577,0.703403
4698,(gill-spacing_c),"(stalk-shape_e, gill-attachment_f)",0.848857,0.428000,0.400286,0.471558,1.101772,1.0,0.036975,1.082428,0.611151,0.456649,0.076151,0.703403


In [124]:
filter1 = frozenset({'class_e'})
filter2 = frozenset({'class_p'})

filtered_rules = rules[rules['consequents'].apply(lambda x: filter1 == x or filter2 == x)]
filtered_rules.sort_values(by='lift', ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
3514,"(odor_n, veil-type_p, veil-color_w)",(class_e),0.434286,0.534857,0.418286,0.963158,1.800776,1.0,0.186005,12.625306,0.786057,0.759336,0.920794,0.872605
3503,"(odor_n, veil-color_w, gill-attachment_f)",(class_e),0.434286,0.534857,0.418286,0.963158,1.800776,1.0,0.186005,12.625306,0.786057,0.759336,0.920794,0.872605
3524,"(odor_n, veil-color_w)",(class_e),0.434286,0.534857,0.418286,0.963158,1.800776,1.0,0.186005,12.625306,0.786057,0.759336,0.920794,0.872605
3350,(odor_n),(class_e),0.436714,0.534857,0.420571,0.963036,1.800547,1.0,0.186992,12.583555,0.789321,0.763288,0.920531,0.87468
3352,"(veil-type_p, odor_n)",(class_e),0.436714,0.534857,0.420571,0.963036,1.800547,1.0,0.186992,12.583555,0.789321,0.763288,0.920531,0.87468
3488,"(veil-type_p, odor_n, gill-attachment_f)",(class_e),0.434429,0.534857,0.418286,0.962841,1.800184,1.0,0.185928,12.517694,0.785933,0.759139,0.920113,0.872446
3498,"(odor_n, gill-attachment_f)",(class_e),0.434429,0.534857,0.418286,0.962841,1.800184,1.0,0.185928,12.517694,0.785933,0.759139,0.920113,0.872446
4466,"(stalk-surface-above-ring_s, ring-number_o, gi...",(class_e),0.425143,0.534857,0.404571,0.951613,1.779191,1.0,0.177181,9.612952,0.761836,0.728395,0.895974,0.854012
2686,"(gill-size_b, veil-type_p, stalk-surface-above...",(class_e),0.466571,0.534857,0.435714,0.933864,1.746007,1.0,0.186165,7.033132,0.800978,0.770202,0.857816,0.87425
2680,"(stalk-surface-above-ring_s, gill-size_b)",(class_e),0.466571,0.534857,0.435714,0.933864,1.746007,1.0,0.186165,7.033132,0.800978,0.770202,0.857816,0.87425


On remarque que quelque soit la méthode, l'attribut odor=n est très présent pour distinguer la classe "e". Sinon, les règles trouvées sont différentes selon les méthodes.

## Base "Zoo"

In [125]:
file = "datasets/zoo.arff"

arff_file = arff.loadarff(file)
data = pd.DataFrame(arff_file[0])

data = data.map(lambda x: x.decode() if isinstance(x, bytes) else x)
data

Unnamed: 0,animal,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,aardvark,true,false,false,true,false,false,true,true,true,true,false,false,4.0,false,false,true,mammal
1,antelope,true,false,false,true,false,false,false,true,true,true,false,false,4.0,true,false,true,mammal
2,bass,false,false,true,false,false,true,true,true,true,false,false,true,0.0,true,false,false,fish
3,bear,true,false,false,true,false,false,true,true,true,true,false,false,4.0,false,false,true,mammal
4,boar,true,false,false,true,false,false,true,true,true,true,false,false,4.0,true,false,true,mammal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,wallaby,true,false,false,true,false,false,false,true,true,true,false,false,2.0,true,false,true,mammal
97,wasp,true,false,true,false,true,false,false,false,false,true,true,false,6.0,false,false,false,insect
98,wolf,true,false,false,true,false,false,true,true,true,true,false,false,4.0,true,false,true,mammal
99,worm,false,false,true,false,false,false,false,false,false,true,false,false,0.0,false,false,false,invertebrate


In [126]:
# Les labels dans la colonne animal sont uniques (sauf deux lignes pour les "frog"), donc on peut les supprimer
data.drop(columns='animal', inplace=True)

legs = data['legs']
zoo_type = data['type']
data = data.drop(columns='type')
data = data.drop(columns='legs')
data = data.applymap(lambda x: True if x == 'true' else False if x == 'false' else x)

legs = pd.get_dummies(legs)
zoo_type = pd.get_dummies(zoo_type)

data = pd.concat([data, legs, zoo_type], axis=1)
data

  data = data.applymap(lambda x: True if x == 'true' else False if x == 'false' else x)


Unnamed: 0,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,...,5.0,6.0,8.0,amphibian,bird,fish,insect,invertebrate,mammal,reptile
0,True,False,False,True,False,False,True,True,True,True,...,False,False,False,False,False,False,False,False,True,False
1,True,False,False,True,False,False,False,True,True,True,...,False,False,False,False,False,False,False,False,True,False
2,False,False,True,False,False,True,True,True,True,False,...,False,False,False,False,False,True,False,False,False,False
3,True,False,False,True,False,False,True,True,True,True,...,False,False,False,False,False,False,False,False,True,False
4,True,False,False,True,False,False,True,True,True,True,...,False,False,False,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,True,False,False,True,False,False,False,True,True,True,...,False,False,False,False,False,False,False,False,True,False
97,True,False,True,False,True,False,False,False,False,True,...,False,True,False,False,False,False,True,False,False,False
98,True,False,False,True,False,False,True,True,True,True,...,False,False,False,False,False,False,False,False,True,False
99,False,False,True,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,True,False,False


In [127]:
frequent_itemsets = apriori(data, min_support=0.5, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

frequent_itemsets.sort_values(by='support', ascending=False)

Unnamed: 0,support,itemsets,length
3,0.821782,(backbone),1
4,0.792079,(breathes),1
5,0.742574,(tail),1
9,0.732673,"(backbone, tail)",2
8,0.683168,"(backbone, breathes)",2
10,0.60396,"(tail, breathes)",2
6,0.60396,"(backbone, toothed)",2
2,0.60396,(toothed),1
12,0.594059,"(backbone, tail, breathes)",3
0,0.584158,(eggs),1


In [128]:
rules = association_rules(frequent_itemsets, num_itemsets=len(data),metric="lift", min_threshold=1)
rules.sort_values(by='lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(backbone),(toothed),0.821782,0.60396,0.60396,0.73494,1.216867,1.0,0.107637,1.494149,1.0,0.73494,0.330723,0.86747
1,(toothed),(backbone),0.60396,0.821782,0.60396,1.0,1.216867,1.0,0.107637,inf,0.45,0.73494,1.0,0.86747
13,(backbone),"(tail, toothed)",0.821782,0.514851,0.514851,0.626506,1.216867,1.0,0.091756,1.298946,1.0,0.626506,0.230145,0.813253
12,"(tail, toothed)",(backbone),0.514851,0.821782,0.514851,1.0,1.216867,1.0,0.091756,inf,0.367347,0.626506,1.0,0.813253
7,(tail),(backbone),0.742574,0.821782,0.732673,0.986667,1.200643,1.0,0.122439,13.366337,0.649168,0.880952,0.925185,0.939116
6,(backbone),(tail),0.821782,0.742574,0.732673,0.891566,1.200643,1.0,0.122439,2.374037,0.937688,0.880952,0.578777,0.939116
18,"(tail, breathes)",(backbone),0.60396,0.821782,0.594059,0.983607,1.196919,1.0,0.097736,10.871287,0.415417,0.714286,0.908015,0.853249
19,(backbone),"(tail, breathes)",0.821782,0.60396,0.594059,0.722892,1.196919,1.0,0.097736,1.429186,0.923148,0.714286,0.300301,0.853249
20,(tail),"(backbone, breathes)",0.742574,0.683168,0.594059,0.8,1.171014,1.0,0.086756,1.584158,0.567308,0.714286,0.36875,0.834783
17,"(backbone, breathes)",(tail),0.683168,0.742574,0.594059,0.869565,1.171014,1.0,0.086756,1.973597,0.460937,0.714286,0.493311,0.834783


## Base Credit-g

In [129]:
file = "datasets/credit-g.arff"

arff_file = arff.loadarff(file)
data = pd.DataFrame(arff_file[0])

data = data.map(lambda x: x.decode() if isinstance(x, bytes) else x)
data

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,...,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes,good
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,...,real estate,22.0,none,own,1.0,skilled,1.0,none,yes,bad
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,...,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes,good
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,...,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes,good
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,...,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes,bad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,no checking,12.0,existing paid,furniture/equipment,1736.0,<100,4<=X<7,3.0,female div/dep/mar,none,...,real estate,31.0,none,own,1.0,unskilled resident,1.0,none,yes,good
996,<0,30.0,existing paid,used car,3857.0,<100,1<=X<4,4.0,male div/sep,none,...,life insurance,40.0,none,own,1.0,high qualif/self emp/mgmt,1.0,yes,yes,good
997,no checking,12.0,existing paid,radio/tv,804.0,<100,>=7,4.0,male single,none,...,car,38.0,none,own,1.0,skilled,1.0,none,yes,good
998,<0,45.0,existing paid,radio/tv,1845.0,<100,1<=X<4,4.0,male single,none,...,no known property,23.0,none,for free,1.0,skilled,1.0,yes,yes,bad


In [130]:
# Change all the numerical attributes to categorical attributes
data['duration'] = pd.cut(data['duration'], bins=3, labels=["short", "medium", "long"])
data['credit_amount'] = pd.cut(data['credit_amount'], bins=3, labels=["low", "medium", "high"])
data['installment_commitment'] = pd.cut(data['installment_commitment'], bins=3, labels=["low", "medium", "high"])
data['residence_since'] = pd.cut(data['residence_since'], bins=3, labels=["low", "medium", "high"])
data['age'] = pd.cut(data['age'], bins=3, labels=["young", "adult", "old"])
data['existing_credits'] = pd.cut(data['existing_credits'], bins=3, labels=["low", "medium", "high"])
data['num_dependents'] = pd.cut(data['num_dependents'], bins=3, labels=["low", "medium", "high"])

data

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,<0,short,critical/other existing credit,radio/tv,low,no known savings,>=7,high,male single,none,...,real estate,old,none,own,low,skilled,low,yes,yes,good
1,0<=X<200,medium,existing paid,radio/tv,low,<100,1<=X<4,low,female div/dep/mar,none,...,real estate,young,none,own,low,skilled,low,none,yes,bad
2,no checking,short,critical/other existing credit,education,low,<100,4<=X<7,low,male single,none,...,real estate,adult,none,own,low,unskilled resident,high,none,yes,good
3,<0,medium,existing paid,furniture/equipment,medium,<100,4<=X<7,low,male single,guarantor,...,life insurance,adult,none,for free,low,skilled,high,none,yes,good
4,<0,short,delayed previously,new car,low,<100,1<=X<4,medium,male single,none,...,no known property,adult,none,for free,low,skilled,high,none,yes,bad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,no checking,short,existing paid,furniture/equipment,low,<100,4<=X<7,medium,female div/dep/mar,none,...,real estate,young,none,own,low,unskilled resident,low,none,yes,good
996,<0,medium,existing paid,used car,low,<100,1<=X<4,high,male div/sep,none,...,life insurance,adult,none,own,low,high qualif/self emp/mgmt,low,yes,yes,good
997,no checking,short,existing paid,radio/tv,low,<100,>=7,high,male single,none,...,car,adult,none,own,low,skilled,low,none,yes,good
998,<0,medium,existing paid,radio/tv,low,<100,1<=X<4,high,male single,none,...,no known property,young,none,for free,low,skilled,low,yes,yes,bad


In [131]:
data_dummies = pd.get_dummies(data)
data_dummies

Unnamed: 0,checking_status_0<=X<200,checking_status_<0,checking_status_>=200,checking_status_no checking,duration_short,duration_medium,duration_long,credit_history_all paid,credit_history_critical/other existing credit,credit_history_delayed previously,...,job_unskilled resident,num_dependents_low,num_dependents_medium,num_dependents_high,own_telephone_none,own_telephone_yes,foreign_worker_no,foreign_worker_yes,class_bad,class_good
0,False,True,False,False,True,False,False,False,True,False,...,False,True,False,False,False,True,False,True,False,True
1,True,False,False,False,False,True,False,False,False,False,...,False,True,False,False,True,False,False,True,True,False
2,False,False,False,True,True,False,False,False,True,False,...,True,False,False,True,True,False,False,True,False,True
3,False,True,False,False,False,True,False,False,False,False,...,False,False,False,True,True,False,False,True,False,True
4,False,True,False,False,True,False,False,False,False,True,...,False,False,False,True,True,False,False,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,False,False,False,True,True,False,False,False,False,False,...,True,True,False,False,True,False,False,True,False,True
996,False,True,False,False,False,True,False,False,False,False,...,False,True,False,False,False,True,False,True,False,True
997,False,False,False,True,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,True
998,False,True,False,False,False,True,False,False,False,False,...,False,True,False,False,False,True,False,True,True,False


In [133]:
frequent_itemsets = apriori(data_dummies, min_support=0.7, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets.sort_values(by='support', ascending=False).head(10)

Unnamed: 0,support,itemsets,length
5,0.966,(existing_credits_low),1
7,0.963,(foreign_worker_yes),1
24,0.93,"(existing_credits_low, foreign_worker_yes)",2
2,0.907,(other_parties_none),1
20,0.88,"(foreign_worker_yes, other_parties_none)",2
18,0.874,"(existing_credits_low, other_parties_none)",2
1,0.865,(credit_amount_low),1
36,0.848,"(existing_credits_low, foreign_worker_yes, oth...",3
6,0.845,(num_dependents_low),1
14,0.835,"(existing_credits_low, credit_amount_low)",2
