In [None]:
# --- ðŸŒŸ Step 1: Import libraries
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import warnings
import matplotlib.pyplot as plt
import networkx as nx

# Ignore RuntimeWarnings for clean output
warnings.filterwarnings("ignore", category=RuntimeWarning)

# --- ðŸŒŸ Step 2: Load and Clean the Dataset

# Read manually to fix formatting issues
with open("Dataset.csv", "r", encoding="utf-8") as file:
    lines = file.readlines()

# Split headers and data
header = lines[0].replace('"', '').strip().split(',')
data = [line.replace('"', '').strip().split(',') for line in lines[1:]]
df = pd.DataFrame(data, columns=header)

# Convert numeric columns
numeric_columns = [
    "lbxsal", "lbdsalsi", "lbxsatsi", "lbxsassi", "lbxsapsi",
    "lbxsbu", "lbdsbusi", "lbxsca", "lbdscasi", "lbxsck", "lbxsch",
    "lbdschsi", "lbxsc3si", "lbxscr", "lbdscrsi", "lbxsgtsi", "lbxsgl",
    "lbdsglsi", "lbxsir", "lbdsirsi", "lbxsldsi", "lbxsph", "lbdsphsi",
    "lbxstb", "lbdstbsi", "lbxstp", "lbdstpsi", "lbxsua", "lbdsuasi",
    "lbxsnasi", "lbxsksi", "lbxsclsi", "lbxsossi", "lbxsgb", "lbdsgbsi",
    "lbxstr", "lbdstrsi"
]

for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Drop 'seqn' (ID)
df = df.drop(columns=['seqn'])

# --- ðŸŒŸ Step 3: Normalize the values into -1, 0, 1

normal_ranges = {
    'lbxsal': (135, 145), 'lbdsalsi': (135, 145), 'lbxsassi': (10, 40), 'lbxsapsi': (44, 147),
    'lbxsbu': (7, 20), 'lbdsbusi': (2.5, 7.1), 'lbxsca': (8.5, 10.2), 'lbdscasi': (2.12, 2.55),
    'lbxsck': (22, 198), 'lbxsch': (0, 200), 'lbdschsi': (0, 5.17), 'lbxsc3si': (22, 29),
    'lbxscr': (0.74, 1.35), 'lbdscrsi': (65.4, 119.3), 'lbxsgtsi': (9, 48), 'lbxsgl': (70, 99),
    'lbdsglsi': (3.9, 5.5), 'lbxsir': (60, 170), 'lbdsirsi': (10.7, 30.4), 'lbxsldsi': (140, 280),
    'lbxsph': (2.5, 4.5), 'lbdsphsi': (0.81, 1.45), 'lbxstb': (0.1, 1.2), 'lbdstbsi': (1.71, 20.5),
    'lbxstp': (6.0, 8.3), 'lbdstpsi': (60, 83), 'lbxsua': (3.5, 7.2), 'lbdsuasi': (208, 428),
    'lbxsnasi': (135, 145), 'lbxsksi': (3.5, 5.1), 'lbxsclsi': (98, 107), 'lbxsossi': (275, 295),
    'lbxsgb': (2.0, 3.5), 'lbdsgbsi': (20, 35), 'lbxstr': (0, 150), 'lbdstrsi': (0, 1.7),
    'lbxsatsi': (20, 55)
}

def normalize(val, low, high):
    if val < low:
        return -1
    elif val > high:
        return 1
    else:
        return 0

normalized_df = pd.DataFrame()

for col, (low, high) in normal_ranges.items():
    normalized_df[col] = df[col].apply(lambda x: normalize(x, low, high))

# --- ðŸŒŸ Step 4: Create Transactions

def get_label(col, val):
    if val == -1:
        return f"{col}â†“"
    elif val == 1:
        return f"{col}â†‘"
    else:
        return None

transactions = []
for _, row in normalized_df.iterrows():
    items = [get_label(col, row[col]) for col in normalized_df.columns]
    transaction = list(filter(None, items))  # Remove None
    transactions.append(transaction)

# --- ðŸŒŸ Step 5: Apply Apriori Algorithm

# Encode transactions
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Run Apriori
frequent_itemsets = apriori(df_encoded, min_support=0.01, use_colnames=True)

# Create rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

# --- ðŸŒŸ Step 6: Clean & Present Rules

# Filter strong rules
strong_rules = rules[(rules['confidence'] >= 0.6) & (rules['lift'] >= 1)]

# Sort by confidence and lift
strong_rules = strong_rules.sort_values(by=['confidence', 'lift'], ascending=False)

# Beautify the output for thesis
print("\nâœ… Strong Association Rules for Medical Analysis:\n")
for idx, rule in strong_rules.iterrows():
    ant = ', '.join(list(rule['antecedents']))
    con = ', '.join(list(rule['consequents']))
    print(f"If [{ant}] â†’ Then [{con}] (Support: {rule['support']:.2f}, Confidence: {rule['confidence']:.2f}, Lift: {rule['lift']:.2f})")

# --- ðŸŒŸ Step 7: Visualize the rules as a Graph

G = nx.DiGraph()

for idx, rule in strong_rules.iterrows():
    for antecedent in rule['antecedents']:
        for consequent in rule['consequents']:
            G.add_edge(antecedent, consequent, weight=rule['confidence'])

plt.figure(figsize=(14, 10))
pos = nx.spring_layout(G, k=0.5, seed=42)
nx.draw(G, pos, with_labels=True, node_color='skyblue', node_size=1500, edge_color='gray', arrowsize=20)
plt.title('ðŸ©º Medical Association Rules Network', fontsize=16)
plt.show()



âœ… Strong Association Rules for Medical Analysis:

If [lbdsgbsiâ†“] â†’ Then [lbxsgbâ†“] (Support: 0.01, Confidence: 1.00, Lift: 97.75)
If [lbxsgbâ†“] â†’ Then [lbdsgbsiâ†“] (Support: 0.01, Confidence: 1.00, Lift: 97.75)
If [lbdsalsiâ†“, lbdsgbsiâ†“] â†’ Then [lbxsgbâ†“] (Support: 0.01, Confidence: 1.00, Lift: 97.75)
If [lbdsalsiâ†“, lbxsgbâ†“] â†’ Then [lbdsgbsiâ†“] (Support: 0.01, Confidence: 1.00, Lift: 97.75)
If [lbdsgbsiâ†“] â†’ Then [lbdsalsiâ†“, lbxsgbâ†“] (Support: 0.01, Confidence: 1.00, Lift: 97.75)
If [lbxsgbâ†“] â†’ Then [lbdsalsiâ†“, lbdsgbsiâ†“] (Support: 0.01, Confidence: 1.00, Lift: 97.75)
If [lbdsgbsiâ†“, lbxsalâ†“] â†’ Then [lbxsgbâ†“] (Support: 0.01, Confidence: 1.00, Lift: 97.75)
If [lbxsalâ†“, lbxsgbâ†“] â†’ Then [lbdsgbsiâ†“] (Support: 0.01, Confidence: 1.00, Lift: 97.75)
If [lbdsgbsiâ†“] â†’ Then [lbxsalâ†“, lbxsgbâ†“] (Support: 0.01, Confidence: 1.00, Lift: 97.75)
If [lbxsgbâ†“] â†’ Then [lbdsgbsiâ†“, lbxsalâ†“] (Support: 0.01, Confidence: 1.00, Lift: 97.75)
I