In [1]:
# Importar las bibliotecas necesarias
import pandas as pd
import itertools

# Definir la función para generar los conjuntos de contraste
def generate_contrast_sets(attributes, values):
    contrast_sets = []
    for r in range(1, len(attributes) + 1):
        combinations = itertools.combinations(zip(attributes, values), r)
        contrast_sets.extend(combinations)
    return contrast_sets

# Definir la función para calcular el soporte de un conjunto de contraste en un grupo
def calculate_support(group, contrast_set):
    filtered_data = group[group[list(zip(*contrast_set))[0]] == list(zip(*contrast_set))[1]]
    support = len(filtered_data) / len(group)
    return support

# Definir la función para encontrar los conjuntos de contraste significativos
def find_significant_contrast_sets(data, groups, mindev):
    significant_contrast_sets = []
    for contrast_set in generate_contrast_sets(data.columns, data.values.T):
        supports = [calculate_support(group, contrast_set) for group in groups]
        max_deviation = max(supports) - min(supports)
        if max_deviation >= mindev:
            significant_contrast_sets.append((contrast_set, supports))
    return significant_contrast_sets

# Datos de ejemplo
data = pd.DataFrame({
    'job_title': ['Graphic Designer', 'Software Engineer', 'Warehouse Associate', 'Software Engineer', 'Graphic Designer'],
    'gender': ['Female', 'Male', 'Female', 'Male', 'Male'],
    'perf_eval': [5, 5, 4, 5, 5],
    'edu': ['College', 'College', 'PhD', 'Masters', 'Masters'],
    'dept': ['Operations', 'Management', 'Administration', 'Sales', 'Engineering'],
    'seniority': [2, 5, 5, 4, 5],
    'total_salary_range': ['40,000-80,000', '80,001-120,000', '80,001-120,000', '80,001-120,000', '80,001-120,000'],
    'age_group': ['0-19', '20-39', '0-19', '20-39', '20-39']
})

# Definir los grupos
groups = [data[data['gender'] == 'Male'], data[data['gender'] == 'Female']]

# Parámetros
mindev = 0.2

# Encontrar los conjuntos de contraste significativos
significant_contrast_sets = find_significant_contrast_sets(data, groups, mindev)

# Imprimir los resultados
print("Conjuntos de contraste significativos:")
for contrast_set, supports in significant_contrast_sets:
    print("Contraste-Set:", contrast_set)
    for group, support in zip(groups, supports):
        print("Grupo:", group['gender'].unique()[0], "Soporte:", support)
    print()


KeyError: ('job_title',)