In [1]:
pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.


In [2]:
from ucimlrepo import fetch_ucirepo 
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules


from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [3]:
# fetch dataset 
congressional_voting_records = fetch_ucirepo(id=105) 
  
# data (as pandas dataframes) 
X = congressional_voting_records.data.features 
y = congressional_voting_records.data.targets 

df = pd.concat([X, y], axis=1)

In [4]:
# Filling NAN values

for col in df.columns[:-1]:
    # Get value counts of "republican" and "democrat" for NaN values
    republican_no_count = df.loc[df['Class'] == 'republican', col].isna().sum()
    democrat_no_count = df.loc[df['Class'] == 'democrat', col].isna().sum()
    
    if republican_no_count > democrat_no_count:
        df.loc[df['Class'] == 'republican', col] = df.loc[df['Class'] == 'republican', col].fillna('n')
        df.loc[df['Class'] == 'democrat', col] = df.loc[df['Class'] == 'democrat', col].fillna('y')
    else:
        df.loc[df['Class'] == 'democrat', col] = df.loc[df['Class'] == 'democrat', col].fillna('n')
        df.loc[df['Class'] == 'republican', col] = df.loc[df['Class'] == 'republican', col].fillna('y')


# separate the df based on class

df_republican = df[df['Class'] == "republican"]
df_democrat = df[df['Class'] == "democrat"]


df_republican
df_democrat

Unnamed: 0,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-corporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa,Class
0,n,y,n,y,y,y,n,n,n,y,y,y,y,y,n,y,republican
1,n,y,n,y,y,y,n,n,n,n,n,y,y,y,n,y,republican
7,n,y,n,y,y,y,n,n,n,n,n,n,y,y,y,y,republican
8,n,y,n,y,y,y,n,n,n,n,n,y,y,y,n,y,republican
10,n,y,n,y,y,n,n,n,n,n,y,y,y,y,n,n,republican
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,n,n,n,y,y,y,y,y,n,y,n,y,y,y,n,y,republican
430,n,n,y,y,y,y,n,n,y,y,n,y,y,y,n,y,republican
432,n,y,n,y,y,y,n,n,n,n,y,y,y,y,n,y,republican
433,n,n,n,y,y,y,y,n,y,y,n,y,y,y,n,y,republican


Unnamed: 0,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-corporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa,Class
2,n,y,y,n,y,y,n,n,n,n,y,n,y,y,n,n,democrat
3,n,y,y,n,n,y,n,n,n,n,y,n,y,n,n,y,democrat
4,y,y,y,n,y,y,n,n,n,n,y,n,y,y,y,y,democrat
5,n,y,y,n,y,y,n,n,n,n,n,n,y,y,y,y,democrat
6,n,y,n,y,y,y,n,n,n,n,n,n,n,y,y,y,democrat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425,n,n,y,n,n,n,y,y,n,y,y,n,n,n,y,n,democrat
426,y,n,y,n,n,n,y,y,y,y,n,n,n,n,y,y,democrat
428,n,n,n,n,n,n,y,y,y,y,n,n,y,n,y,y,democrat
429,y,n,y,n,n,n,y,y,y,y,n,y,n,n,y,y,democrat


In [5]:
dummmy_republican = pd.get_dummies(df_republican)
dummmy_democrat = pd.get_dummies(df_democrat)

In [12]:
frequent_itemsets_republican = apriori(dummmy_republican, min_support=0.6, use_colnames=True)
frequent_itemsets_republican

frequent_itemsets_democrat = apriori(dummmy_democrat, min_support=0.6, use_colnames=True)
frequent_itemsets_democrat

Unnamed: 0,support,itemsets
0,0.797619,(handicapped-infants_n)
1,0.845238,(adoption-of-the-budget-resolution_n)
2,0.988095,(physician-fee-freeze_y)
3,0.952381,(el-salvador-aid_y)
4,0.898810,(religious-groups-in-schools_y)
...,...,...
5536,0.607143,"(education-spending_y, superfund-right-to-sue_..."
5537,0.607143,"(education-spending_y, superfund-right-to-sue_..."
5538,0.607143,"(education-spending_y, physician-fee-freeze_y,..."
5539,0.613095,"(education-spending_y, superfund-right-to-sue_..."


Unnamed: 0,support,itemsets
0,0.865169,(adoption-of-the-budget-resolution_y)
1,0.947566,(physician-fee-freeze_n)
2,0.794007,(el-salvador-aid_n)
3,0.749064,(anti-satellite-test-ban_y)
4,0.831461,(aid-to-nicaraguan-contras_y)
...,...,...
286,0.602996,"(el-salvador-aid_n, adoption-of-the-budget-res..."
287,0.640449,"(el-salvador-aid_n, physician-fee-freeze_n, ai..."
288,0.610487,"(mx-missile_y, physician-fee-freeze_n, aid-to-..."
289,0.602996,"(superfund-right-to-sue_n, physician-fee-freez..."


In [13]:
rules_republican = association_rules(frequent_itemsets_republican, metric="confidence", min_threshold=0.9)
rules_republican

rules_democrat = association_rules(frequent_itemsets_democrat, metric="confidence", min_threshold=0.9)
rules_democrat

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(handicapped-infants_n),(physician-fee-freeze_y),0.797619,0.988095,0.791667,0.992537,1.004496,0.003543,1.595238,0.022114
1,(handicapped-infants_n),(el-salvador-aid_y),0.797619,0.952381,0.779762,0.977612,1.026493,0.020125,2.126984,0.127526
2,(handicapped-infants_n),(religious-groups-in-schools_y),0.797619,0.898810,0.738095,0.925373,1.029554,0.021188,1.355952,0.141841
3,(handicapped-infants_n),(aid-to-nicaraguan-contras_n),0.797619,0.857143,0.732143,0.917910,1.070896,0.048469,1.740260,0.327116
4,(handicapped-infants_n),(education-spending_y),0.797619,0.880952,0.744048,0.932836,1.058895,0.041383,1.772487,0.274824
...,...,...,...,...,...,...,...,...,...,...
147798,"(religious-groups-in-schools_y, duty-free-expo...","(superfund-right-to-sue_y, physician-fee-freez...",0.666667,0.761905,0.613095,0.919643,1.207031,0.105159,2.962963,0.514563
147799,"(duty-free-exports_n, aid-to-nicaraguan-contra...","(superfund-right-to-sue_y, physician-fee-freez...",0.678571,0.791667,0.613095,0.903509,1.141274,0.075893,2.159091,0.385113
147800,"(religious-groups-in-schools_y, duty-free-expo...","(education-spending_y, physician-fee-freeze_y,...",0.678571,0.779762,0.613095,0.903509,1.158698,0.083971,2.282468,0.426106
147801,"(religious-groups-in-schools_y, duty-free-expo...","(education-spending_y, physician-fee-freeze_y,...",0.672619,0.767857,0.613095,0.911504,1.187076,0.096620,2.623214,0.481377


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(adoption-of-the-budget-resolution_y),(physician-fee-freeze_n),0.865169,0.947566,0.838951,0.969697,1.023356,0.019147,1.730337,0.169271
1,(el-salvador-aid_n),(adoption-of-the-budget-resolution_y),0.794007,0.865169,0.722846,0.910377,1.052254,0.035896,1.504435,0.241074
2,(anti-satellite-test-ban_y),(adoption-of-the-budget-resolution_y),0.749064,0.865169,0.689139,0.920000,1.063377,0.041072,1.685393,0.237508
3,(aid-to-nicaraguan-contras_y),(adoption-of-the-budget-resolution_y),0.831461,0.865169,0.771536,0.927928,1.072540,0.052182,1.870787,0.401294
4,(mx-missile_y),(adoption-of-the-budget-resolution_y),0.704120,0.865169,0.640449,0.909574,1.051326,0.031267,1.491077,0.165001
...,...,...,...,...,...,...,...,...,...,...
1737,"(anti-satellite-test-ban_y, el-salvador-aid_n,...","(aid-to-nicaraguan-contras_y, Class_democrat, ...",0.651685,0.756554,0.602996,0.925287,1.223028,0.109961,3.258427,0.523542
1738,"(anti-satellite-test-ban_y, education-spending...","(aid-to-nicaraguan-contras_y, Class_democrat, ...",0.655431,0.767790,0.602996,0.920000,1.198244,0.099763,2.902622,0.480151
1739,"(aid-to-nicaraguan-contras_y, anti-satellite-t...","(education-spending_n, Class_democrat, physici...",0.662921,0.719101,0.602996,0.909605,1.264919,0.126289,3.107444,0.621325
1740,"(education-spending_n, anti-satellite-test-ban...","(aid-to-nicaraguan-contras_y, Class_democrat, ...",0.640449,0.767790,0.602996,0.941520,1.226273,0.111265,3.970787,0.513199


In [20]:
rules_republican = rules_republican[rules_republican.consequents == frozenset({'Class_republican'})]
rules_democrat = rules_democrat[rules_democrat.consequents == frozenset({'Class_democrat'})]

In [22]:
rules_republican_filtered = rules_republican[rules_republican['antecedents'].apply(lambda x: len(x) == 3)]
rules_democrat_filtered = rules_democrat[rules_democrat['antecedents'].apply(lambda x: len(x) == 3)]



In [23]:
rules_democrat_filtered

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
316,"(physician-fee-freeze_n, el-salvador-aid_n, ad...",(Class_democrat),0.722846,1.0,0.722846,1.0,1.0,0.0,inf,0.0
333,"(physician-fee-freeze_n, anti-satellite-test-b...",(Class_democrat),0.689139,1.0,0.689139,1.0,1.0,0.0,inf,0.0
365,"(aid-to-nicaraguan-contras_y, physician-fee-fr...",(Class_democrat),0.76779,1.0,0.76779,1.0,1.0,0.0,inf,0.0
373,"(mx-missile_y, physician-fee-freeze_n, adoptio...",(Class_democrat),0.632959,1.0,0.632959,1.0,1.0,0.0,inf,0.0
384,"(education-spending_n, physician-fee-freeze_n,...",(Class_democrat),0.771536,1.0,0.771536,1.0,1.0,0.0,inf,0.0
391,"(superfund-right-to-sue_n, physician-fee-freez...",(Class_democrat),0.655431,1.0,0.655431,1.0,1.0,0.0,inf,0.0
398,"(physician-fee-freeze_n, crime_n, adoption-of-...",(Class_democrat),0.625468,1.0,0.625468,1.0,1.0,0.0,inf,0.0
416,"(el-salvador-aid_n, anti-satellite-test-ban_y,...",(Class_democrat),0.651685,1.0,0.651685,1.0,1.0,0.0,inf,0.0
440,"(aid-to-nicaraguan-contras_y, el-salvador-aid_...",(Class_democrat),0.715356,1.0,0.715356,1.0,1.0,0.0,inf,0.0
449,"(mx-missile_y, el-salvador-aid_n, adoption-of-...",(Class_democrat),0.606742,1.0,0.606742,1.0,1.0,0.0,inf,0.0


In [24]:
rules_republican_filtered

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
1044,"(physician-fee-freeze_y, adoption-of-the-budge...",(Class_republican),0.708333,1.0,0.708333,1.0,1.0,0.0,inf,0.0
1078,"(adoption-of-the-budget-resolution_n, handicap...",(Class_republican),0.702381,1.0,0.702381,1.0,1.0,0.0,inf,0.0
1098,"(religious-groups-in-schools_y, adoption-of-th...",(Class_republican),0.660714,1.0,0.660714,1.0,1.0,0.0,inf,0.0
1104,"(adoption-of-the-budget-resolution_n, handicap...",(Class_republican),0.601190,1.0,0.601190,1.0,1.0,0.0,inf,0.0
1128,"(adoption-of-the-budget-resolution_n, aid-to-n...",(Class_republican),0.672619,1.0,0.672619,1.0,1.0,0.0,inf,0.0
...,...,...,...,...,...,...,...,...,...,...
5573,"(duty-free-exports_n, synfuels-corporation-cut...",(Class_republican),0.696429,1.0,0.696429,1.0,1.0,0.0,inf,0.0
5582,"(superfund-right-to-sue_y, education-spending_...",(Class_republican),0.797619,1.0,0.797619,1.0,1.0,0.0,inf,0.0
5593,"(duty-free-exports_n, education-spending_y, su...",(Class_republican),0.720238,1.0,0.720238,1.0,1.0,0.0,inf,0.0
5598,"(duty-free-exports_n, education-spending_y, cr...",(Class_republican),0.750000,1.0,0.750000,1.0,1.0,0.0,inf,0.0


In [25]:
antecedents_counts = rules_republican_filtered['antecedents'].explode().value_counts()
top_10_elements = antecedents_counts.head(10)

print(top_10_elements)


antecedents
physician-fee-freeze_y                 69
crime_y                                69
el-salvador-aid_y                      68
religious-groups-in-schools_y          68
adoption-of-the-budget-resolution_n    65
aid-to-nicaraguan-contras_n            65
education-spending_y                   65
mx-missile_n                           64
superfund-right-to-sue_y               63
duty-free-exports_n                    63
Name: count, dtype: int64


In [26]:
antecedents_counts = rules_democrat_filtered['antecedents'].explode().value_counts()
top_10_elements = antecedents_counts.head(10)

print(top_10_elements)


antecedents
physician-fee-freeze_n                 24
aid-to-nicaraguan-contras_y            24
el-salvador-aid_n                      22
adoption-of-the-budget-resolution_y    20
education-spending_n                   20
anti-satellite-test-ban_y              16
mx-missile_y                           13
superfund-right-to-sue_n               13
crime_n                                 7
Name: count, dtype: int64
