In [15]:
import pandas as pd

# Load your medical dataset
medical_data = pd.read_csv('cleaned_medical_data.csv')

# Select relevant columns for Market Basket Analysis
selected_columns = [
    'Readmit_30D', 'High_blood_pressure', 'Stroke', 'Overweight', 'Arthritis',
    'Diabetes', 'Hyperlipidemia', 'Back_pain', 'Anxiety', 'Rhinitis_allergy',
    'Reflux_esophagitis', 'Asthma', 'Complication_risk', 'Initial_admin', 'Gender', 'Age'
]

basket_data = medical_data[selected_columns].copy()

# Convert Age to categorical groups for better analysis
basket_data['Age_Group'] = pd.cut(basket_data['Age'], 
                                  bins=[0, 30, 45, 60, 75, 90, 120],
                                  labels=['0-30', '31-45', '46-60', '61-75', '76-90', '90+'])

basket_data.drop('Age', axis=1, inplace=True)

# Convert data to transactional format
transactions = []
for _, row in basket_data.iterrows():
    transaction = [f"{column}={row[column]}" for column in basket_data.columns]
    transactions.append(transaction)

# Convert transactions into DataFrame
transaction_df = pd.DataFrame(transactions)

# Save transactional data to CSV
transaction_df.to_csv('medical_transactions.csv', index=False, header=False)

print("Transactional dataset for Market Basket Analysis has been saved as medical_transactions.csv.")


Transactional dataset for Market Basket Analysis has been saved as medical_transactions.csv.


In [17]:
# Import required libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Load the transactional data
transactions = pd.read_csv('medical_transactions.csv', header=None)

# Convert transactions into a list of lists
transactions_list = transactions.values.tolist()

# Encoding transactions
te = TransactionEncoder()
te_ary = te.fit(transactions_list).transform(transactions_list)
transaction_df = pd.DataFrame(te_ary, columns=te.columns_)

# Generate frequent itemsets using Apriori
frequent_itemsets = apriori(transaction_df, min_support=0.05, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Displaying the resulting rules sorted by confidence
rules.sort_values(by='confidence', ascending=False, inplace=True)

# Save rules to CSV
rules.to_csv('association_rules.csv', index=False)

# Display top rules
print(rules.head(10))


                                                                                                antecedents  \
56725                                              (Arthritis=Yes, Rhinitis_allergy=No, Hyperlipidemia=Yes)   
466803  (Reflux_esophagitis=No, Rhinitis_allergy=Yes, Asthma=No, Hyperlipidemia=No, High_blood_pressure=No)   
175379             (Anxiety=No, Rhinitis_allergy=No, Initial_admin=Emergency Admission, Hyperlipidemia=Yes)   
1144                                                                       (Age_Group=31-45, Back_pain=Yes)   
179245                                  (Readmit_30D=No, Back_pain=No, Anxiety=Yes, High_blood_pressure=No)   
254153              (Initial_admin=Emergency Admission, Rhinitis_allergy=No, Asthma=No, Hyperlipidemia=Yes)   
175609                        (Anxiety=No, Hyperlipidemia=Yes, Rhinitis_allergy=No, Reflux_esophagitis=Yes)   
14092                                             (Age_Group=31-45, High_blood_pressure=No, Readmit_30D=No)   
2

In [19]:
import pandas as pd

# Load rules from CSV
rules = pd.read_csv('association_rules.csv')

# Sort rules by confidence and lift to identify the most relevant ones
top_rules = rules.sort_values(by=['confidence', 'lift'], ascending=[False, False]).head(3)

# Display the top 3 rules clearly
pd.set_option('display.max_colwidth', None)
display(top_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


Unnamed: 0,antecedents,consequents,support,confidence,lift
0,"frozenset({'Arthritis=Yes', 'Rhinitis_allergy=No', 'Hyperlipidemia=Yes'})",frozenset({'Stroke=No'}),0.061,0.850767,1.062529
1,"frozenset({'Reflux_esophagitis=No', 'Rhinitis_allergy=Yes', 'Asthma=No', 'Hyperlipidemia=No', 'High_blood_pressure=No'})",frozenset({'Stroke=No'}),0.0537,0.849684,1.061176
2,"frozenset({'Anxiety=No', 'Rhinitis_allergy=No', 'Initial_admin=Emergency Admission', 'Hyperlipidemia=Yes'})",frozenset({'Stroke=No'}),0.0645,0.846457,1.057146
