## Step 1: Preprocess the Data

In [15]:
import pandas as pd
from itertools import combinations

# Load the dataset (replace with your file path)
df = pd.read_excel("Ask A Manager Salary Survey 2021 (Responses).xlsx", sheet_name="Form Responses 1")

# Preprocessing: Convert salary to categorical bins
df['salary_bin'] = pd.cut(df['annual salary'], bins=[0, 50000, 100000, 200000, float('inf')], 
                          labels=['<50k', '50k-100k', '100k-200k', '>200k'])

# Convert experience to categorical
experience_map = {
    '1 year or less': 'Entry',
    '2 - 4 years': 'Junior',
    '5-7 years': 'Mid',
    '8 - 10 years': 'Senior',
    '11 - 20 years': 'Expert',
    '21 - 30 years': 'Veteran',
    '31 - 40 years': 'Veteran',
    '41 years or more': 'Veteran'
}
df['experience'] = df['overall years of professional experience'].map(experience_map)

# Select relevant columns and convert to transactions
columns = ['industry', 'job title', 'salary_bin', 'experience', 'highest level of education completed', 'gender']
transactions = []
for _, row in df[columns].iterrows():
    transaction = [
        f"Industry={row['industry']}",
        f"Job={row['job title']}",
        f"Salary={row['salary_bin']}",
        f"Exp={row['experience']}",
        f"Edu={row['highest level of education completed']}",
        f"Gender={row['gender']}"
    ]
    transactions.append([item for item in transaction if 'nan' not in item])

## Step 2: Apriori Algorithm Implementation

In [16]:
def get_frequent_itemsets(transactions, min_support):
    item_counts = {}
    for transaction in transactions:
        for item in transaction:
            item_counts[item] = item_counts.get(item, 0) + 1

    # Filter items by minimum support
    frequent_items = {item: count for item, count in item_counts.items() if count / len(transactions) >= min_support}
    frequent_itemsets = [frozenset([item]) for item in frequent_items]

    k = 2
    while True:
        candidate_itemsets = set()
        for i in range(len(frequent_itemsets)):
            for j in range(i+1, len(frequent_itemsets)):
                union = frequent_itemsets[i].union(frequent_itemsets[j])
                if len(union) == k:
                    candidate_itemsets.add(union)

        # Count candidates
        candidate_counts = {}
        for transaction in transactions:
            for candidate in candidate_itemsets:
                if candidate.issubset(transaction):
                    candidate_counts[candidate] = candidate_counts.get(candidate, 0) + 1

        # Filter candidates by minimum support
        new_frequent = [itemset for itemset, count in candidate_counts.items() if count / len(transactions) >= min_support]
        if not new_frequent:
            break

        frequent_itemsets.extend(new_frequent)
        k += 1

    return frequent_itemsets

# Mine frequent itemsets with min_support=0.05
frequent_itemsets = get_frequent_itemsets(transactions, min_support=0.05)

In [17]:
frequent_itemsets

[frozenset({'Industry=education higher education'}),
 frozenset({'Salary=50k-100k'}),
 frozenset({'Exp=Mid'}),
 frozenset({"Edu=master's degree"}),
 frozenset({'Gender=woman'}),
 frozenset({'Industry=computing or tech'}),
 frozenset({'Exp=Senior'}),
 frozenset({'Edu=college degree'}),
 frozenset({'Salary=<50k'}),
 frozenset({'Exp=Junior'}),
 frozenset({'Industry=nonprofits'}),
 frozenset({'Gender=man'}),
 frozenset({'Salary=100k-200k'}),
 frozenset({'Exp=Veteran'}),
 frozenset({'Exp=Expert'}),
 frozenset({'Edu=phd'}),
 frozenset({'Industry=health care'}),
 frozenset({'Edu=some college'}),
 frozenset({'Industry=government and public administration'}),
 frozenset({'Industry=engineering or manufacturing'}),
 frozenset({"Edu=master's degree", 'Industry=education higher education'}),
 frozenset({'Gender=woman', 'Salary=50k-100k'}),
 frozenset({'Exp=Mid', 'Salary=50k-100k'}),
 frozenset({'Exp=Mid', 'Gender=woman'}),
 frozenset({"Edu=master's degree", 'Salary=50k-100k'}),
 frozenset({'Industr

## Step 3: Generate Association Rules

In [18]:
def generate_rules(frequent_itemsets, transactions, min_confidence=0.7):
    rules = []
    for itemset in frequent_itemsets:
        if len(itemset) > 1:
            subsets = [frozenset(s) for s in combinations(itemset, len(itemset)-1)]
            for subset in subsets:
                remainder = itemset - subset
                subset_count = sum(1 for t in transactions if subset.issubset(t))
                if subset_count == 0:
                    continue
                confidence = sum(1 for t in transactions if itemset.issubset(t)) / subset_count
                if confidence >= min_confidence:
                    rules.append((set(subset), set(remainder), confidence))
    return sorted(rules, key=lambda x: -x[2])

# Generate rules with min_confidence=0.7
association_rules = generate_rules(frequent_itemsets, transactions, min_confidence=0.7)

# Print top rules
for rule in association_rules[:10]:
    antecedent, consequent, confidence = rule
    print(f"Rule: {antecedent} => {consequent}, Confidence: {confidence:.2f}")

Rule: {'Exp=Expert', 'Edu=college degree', 'Salary=50k-100k'} => {'Gender=woman'}, Confidence: 0.93
Rule: {'Industry=nonprofits', 'Salary=50k-100k'} => {'Gender=woman'}, Confidence: 0.93
Rule: {'Industry=health care'} => {'Gender=woman'}, Confidence: 0.92
Rule: {'Exp=Expert', 'Salary=50k-100k', "Edu=master's degree"} => {'Gender=woman'}, Confidence: 0.91
Rule: {'Salary=50k-100k', 'Exp=Veteran'} => {'Gender=woman'}, Confidence: 0.91
Rule: {'Industry=nonprofits'} => {'Gender=woman'}, Confidence: 0.91
Rule: {'Exp=Expert', 'Salary=50k-100k'} => {'Gender=woman'}, Confidence: 0.91
Rule: {'Salary=50k-100k', "Edu=master's degree"} => {'Gender=woman'}, Confidence: 0.90
Rule: {'Exp=Expert', 'Edu=college degree'} => {'Gender=woman'}, Confidence: 0.90
Rule: {'Industry=education higher education', 'Salary=50k-100k'} => {'Gender=woman'}, Confidence: 0.90


## Result