In [25]:
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth, association_rules

# Load the dataset
file_path = 'health.csv'
data = pd.read_csv(file_path, encoding='latin1')

# Display the first few rows to understand the dataset structure
print("Initial Dataset Head:")
print(data.head(1))

# Filter the dataset for symptoms (S), conditions (C), and treatments (T)
symptoms = data[data['type'] == 'S']
conditions = data[data['type'] == 'C']
treatments = data[data['type'] == 'T']

# Function to perform one-hot encoding and find frequent itemsets and rules using FP-Growth
def find_association_rules_fp_growth(df, min_support=0.01, min_confidence=0.01):
    # Create the basket format
    basket = df.groupby(['reportid', 'name'])['value'].count().unstack().reset_index().fillna(0).set_index('reportid')
    # Convert counts to 1s and 0s for binary encoding
    basket_sets = basket.applymap(lambda x: 1 if x > 0 else 0)
    
    # Apply FP-Growth algorithm
    frequent_itemsets = fpgrowth(basket_sets, min_support=min_support, use_colnames=True)
    
    # Generate association rules
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
    
    return frequent_itemsets, rules

# Function to compute additional metrics for rules
def compute_additional_metrics(rules, df):
    rules['support_count'] = rules['support'] * len(df)
    rules['confidence_count'] = rules['confidence'] * rules['support_count']
    rules['kulczynski'] = (rules['confidence'] + (rules['support'] / rules['consequent support'])) / 2
    rules['imbalance_ratio'] = abs(rules['antecedent support'] - rules['consequent support']) / (rules['antecedent support'] + rules['consequent support'] - rules['support'])
    return rules

# Find association rules for symptoms
symptom_itemsets, symptom_rules = find_association_rules_fp_growth(symptoms, min_support=0.01, min_confidence=0.01)
symptom_rules = compute_additional_metrics(symptom_rules, symptoms)
print("Frequent Symptoms Itemsets:\n", symptom_itemsets)
print("Symptom Association Rules:\n", symptom_rules)

# Find association rules for conditions
condition_itemsets, condition_rules = find_association_rules_fp_growth(conditions, min_support=0.01, min_confidence=0.01)
condition_rules = compute_additional_metrics(condition_rules, conditions)
print("Frequent Conditions Itemsets:\n", condition_itemsets)
print("Condition Association Rules:\n", condition_rules)

# Find association rules for treatments
treatment_itemsets, treatment_rules = find_association_rules_fp_growth(treatments, min_support=0.01, min_confidence=0.01)
treatment_rules = compute_additional_metrics(treatment_rules, treatments)
print("Frequent Treatments Itemsets:\n", treatment_itemsets)
print("Treatment Association Rules:\n", treatment_rules)


  data = pd.read_csv(file_path, encoding='latin1')


Initial Dataset Head:
       ID                              user       date  reportid type  \
0  277224  QEVuQwEA///txlSWtDIyQTLnQ241zw==  10/4/2016     16906    C   

        name value Unnamed: 7 Unnamed: 8 Unnamed: 9  
0  stiffness     4        NaN        NaN        NaN  




Frequent Symptoms Itemsets:
        support                                    itemsets
0     0.026671                           (joint stiffness)
1     0.303513                                    (nausea)
2     0.232193                                   (anxiety)
3     0.426895                                   (fatigue)
4     0.204721                                  (diarrhea)
...        ...                                         ...
6895  0.010019                    (nausea, bowel problems)
6896  0.010042  (back pain, bowel problems, shoulder pain)
6897  0.010019       (back pain, eye pain, bowel problems)
6898  0.010019   (eye pain, bowel problems, shoulder pain)
6899  0.010019         (back pain, nausea, bowel problems)

[6900 rows x 2 columns]
Symptom Association Rules:
                         antecedents                  consequents  \
0                 (joint stiffness)                 (joint pain)   
1                      (joint pain)            (joint stiffness)   
2     



Frequent Conditions Itemsets:
       support                                           itemsets
0    0.042080                               (ulcerative colitis)
1    0.031708                                   (crohns disease)
2    0.016752                                    (gastroparesis)
3    0.024880                     (generalized anxiety disorder)
4    0.024171                                  (bipolar type ii)
..        ...                                                ...
185  0.013066                  (paresthesia, ulcerative colitis)
186  0.013042         (acid reflux, hypothyroidism, paresthesia)
187  0.013042  (hypothyroidism, paresthesia, ulcerative colitis)
188  0.013042     (acid reflux, paresthesia, ulcerative colitis)
189  0.013019  (acid reflux, hypothyroidism, paresthesia, ulc...

[190 rows x 2 columns]
Condition Association Rules:
                            antecedents  \
0                       (fibromyalgia)   
1       (generalized anxiety disorder)   
2        



Frequent Treatments Itemsets:
         support                                           itemsets
0      0.027684                                           (zofran)
1      0.100605                                        (ibuprofen)
2      0.013747                                        (midodrine)
3      0.015270                                           (prozac)
4      0.010967                                           (coffee)
...         ...                                                ...
25270  0.010548  (mirtazapine, kirkland fish oil, probiotic, ma...
25271  0.010548  (mirtazapine, gui pi pills, kirkland fish oil,...
25272  0.017745                                 (coq10, b12 spray)
25273  0.017745                            (b12 spray, naltrexone)
25274  0.017745                     (coq10, b12 spray, naltrexone)

[25275 rows x 2 columns]
Treatment Association Rules:
                      antecedents              consequents  antecedent support  \
0                    (vitami

In [26]:
import pandas as pd


# Define function to filter top rules
def filter_top_rules(rules, top_n=10, metric='lift'):
    return rules.nlargest(top_n, metric)

# Filter top symptom rules
top_symptom_rules = filter_top_rules(symptom_rules, top_n=10, metric='lift')
top_symptom_confidence = filter_top_rules(symptom_rules, top_n=10, metric='confidence')
top_symptom_support = filter_top_rules(symptom_rules, top_n=10, metric='support')

# Filter top condition rules
top_condition_rules = filter_top_rules(condition_rules, top_n=10, metric='lift')
top_condition_confidence = filter_top_rules(condition_rules, top_n=10, metric='confidence')
top_condition_support = filter_top_rules(condition_rules, top_n=10, metric='support')

# Filter top treatment rules
top_treatment_rules = filter_top_rules(treatment_rules, top_n=10, metric='lift')
top_treatment_confidence = filter_top_rules(treatment_rules, top_n=10, metric='confidence')
top_treatment_support = filter_top_rules(treatment_rules, top_n=10, metric='support')

# Print the filtered top rules
print("Top Symptom Association Rules by Lift:\n", top_symptom_rules)
print("Top Symptom Association Rules by Confidence:\n", top_symptom_confidence)
print("Top Symptom Association Rules by Support:\n", top_symptom_support)

print("\nTop Condition Association Rules by Lift:\n", top_condition_rules)
print("\nTop Condition Association Rules by Confidence:\n", top_condition_confidence)
print("\nTop Condition Association Rules by Support:\n", top_condition_support)

print("\nTop Treatment Association Rules by Lift:\n", top_treatment_rules)
print("\nTop Treatment Association Rules by Confidence:\n", top_treatment_confidence)
print("\nTop Treatment Association Rules by Support:\n", top_treatment_support)


Top Symptom Association Rules by Lift:
                                               antecedents  \
128782  (stomach pain, pelvic pain, headache, anxiety,...   
128915  (dizziness, diarrhea, gas, tingling in hands a...   
126373  (stomach pain, tingling in hands and feet, dia...   
126520                      (dizziness, gas, pelvic pain)   
128755  (stomach pain, tingling in hands and feet, dia...   
128942            (dizziness, headache, gas, pelvic pain)   
128670  (stomach pain, tingling in hands and feet, hea...   
129027                      (dizziness, gas, pelvic pain)   
128824  (tingling in hands and feet, headache, diarrhe...   
128873        (stomach pain, dizziness, gas, pelvic pain)   

                                              consequents  antecedent support  \
128782  (dizziness, diarrhea, gas, tingling in hands a...            0.010591   
128915  (stomach pain, pelvic pain, headache, anxiety,...            0.010293   
126373                      (dizziness, gas, 

           antecedents      consequents  antecedent support  \
136       (vitamin d)      (magnesium)            0.111687   
137       (magnesium)      (vitamin d)            0.067058   
134       (vitamin d)      (vitamin c)            0.111687   
135       (vitamin c)      (vitamin d)            0.052664   
138   (amitriptyline)       (tramadol)            0.051750   
139        (tramadol)  (amitriptyline)            0.057385   
4         (vitamin d)      (synthroid)            0.111687   
5         (synthroid)      (vitamin d)            0.035756   
1512      (probiotic)      (vitamin d)            0.045581   
1513      (vitamin d)      (probiotic)            0.111687   

      consequent support   support  confidence      lift  leverage  \
136             0.067058  0.033357    0.298670  4.453936  0.025868   
137             0.111687  0.033357    0.497445  4.453936  0.025868   
134             0.052664  0.031301    0.280259  5.321681  0.025419   
135             0.111687  0.031301  