In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Load the dataset
data = pd.read_csv('penguins_lter.csv')  # Ensure the path to your dataset is correct

# Clean the dataset (dropping rows with missing values)
cleaned_data = data.dropna(subset=['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)'])

# Convert numeric columns to categorical bins
cleaned_data.loc[:, 'Culmen Length (mm)'] = pd.cut(cleaned_data['Culmen Length (mm)'], bins=3, labels=["short", "medium", "long"])
cleaned_data.loc[:, 'Culmen Depth (mm)'] = pd.cut(cleaned_data['Culmen Depth (mm)'], bins=3, labels=["shallow", "medium", "deep"])
cleaned_data.loc[:, 'Flipper Length (mm)'] = pd.cut(cleaned_data['Flipper Length (mm)'], bins=3, labels=["short", "medium", "long"])
cleaned_data.loc[:, 'Body Mass (g)'] = pd.cut(cleaned_data['Body Mass (g)'], bins=3, labels=["light", "medium", "heavy"])

# One-hot encode the categorical data
one_hot_data = pd.get_dummies(cleaned_data[['Species', 'Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']])

# Apply the Apriori algorithm
frequent_itemsets = apriori(one_hot_data, min_support=0.1, use_colnames=True)

# Generate the association rules
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.6)

# Filter rules with support > 0.3 and confidence > 0.8
filtered_rules = rules[(rules['support'] > 0.3) & (rules['confidence'] > 0.8)]

# Select only relevant columns: antecedents, support, and confidence
cleaned_output = filtered_rules[['antecedents', 'support', 'confidence']]

# Display the cleaned output without row indices
print(cleaned_output.to_string(index=False))

# Optionally save the cleaned output to a CSV file
# cleaned_output.to_csv('cleaned_association_rules.csv', index=False)


                                  antecedents  support  confidence
                   (Culmen Length (mm)_short) 0.365497    0.984252
(Species_Adelie Penguin (Pygoscelis adeliae)) 0.365497    0.827815
  (Species_Gentoo penguin (Pygoscelis papua)) 0.304094    0.845528
