In [None]:
import pandas as pd
from apyori import apriori

# Load the dataset
df = pd.read_csv('Data.csv', header=None)
df.columns = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 
              'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']


In [None]:
# Data preprocessing
# Define numerical attributes to be converted to categorical
numerical_attributes = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']

# Convert numerical attributes to categorical
for attr in numerical_attributes:
    df[attr] = pd.cut(df[attr], bins=5)

# Convert income attribute to binary
df['income'] = df['income'].apply(lambda x: '>50K' if x.strip() == '>50K' else '<=50K')

# Convert dataframe to list of lists
records = df.values.tolist()

In [None]:
# Association rule mining
def find_rules(records, min_support, min_confidence, min_lift, min_length):
    rules = apriori(records, min_support=min_support, min_confidence=min_confidence,
                    min_lift=min_lift, min_length=min_length)
    return list(rules)

# Example usage
min_support = 0.0022
min_confidence = 0.2
min_lift = 3
min_length = 4
results = find_rules(records, min_support, min_confidence, min_lift, min_length)

In [None]:
# Print the results
for rule in results:
    # Extracting the rule, support, and confidence
    rule_str = f"RULE: {list(rule.items)}\nSUPPORT: {rule.support}\nCONFIDENCE: {rule.ordered_statistics[0].confidence}\nLIFT: {rule.ordered_statistics[0].lift}"
    print(rule_str)