In [3]:
import numpy as np
import pulp

# Define parameters
tau = 0.6  # valid label rate
delta = 0.5  # accuracy per label
kappa = 0.6  # labeling function accuracy threshold
gamma = 0.7  # dominant label threshold for flipping

# Input data (rows are sentences, columns are labeling functions)
data = np.array([
    [0, 1, 1],  # Sentence 1
    [-1, 1, 1],  # Sentence 2
    [0, 1, 1],  # Sentence 3
    [1, -1, 1],  # Sentence 4
    [0, 1, -1]   # Sentence 5
])

# Expected labels
expected_labels = [1, 0, 1, 0, 1]

# Number of labeling functions and sentences
num_lf = data.shape[1]
num_sentences = len(expected_labels)

# Binary decision variables for each flip (1 = flip, 0 = no flip)
flip_vars = pulp.LpVariable.dicts("flip", (range(num_sentences), range(num_lf)), cat="Binary")

# Define the optimization problem (minimizing the total number of flips)
prob = pulp.LpProblem("Minimize_Flips", pulp.LpMinimize)

# Objective: minimize the total number of flips
prob += pulp.lpSum([flip_vars[i][j] for i in range(num_sentences) for j in range(num_lf)]), "TotalFlips"

# Constraints for dominant label proportion and flipping
for j in range(num_lf):
    # Count of 0s and 1s after flipping
    num_zeros = pulp.lpSum([(1 if data[i, j] == 0 else 0) - flip_vars[i][j] for i in range(num_sentences) if data[i, j] != -1])
    num_ones = pulp.lpSum([(1 if data[i, j] == 1 else 0) + flip_vars[i][j] for i in range(num_sentences) if data[i, j] != -1])
    
    valid_preds = pulp.lpSum([1 for i in range(num_sentences) if data[i, j] != -1])

    # Auxiliary variable to represent the maximum of num_zeros and num_ones
    max_label_count = pulp.LpVariable(f"max_label_count_{j}", lowBound=0)

    # max_label_count should be greater than or equal to both num_zeros and num_ones
    prob += max_label_count >= num_zeros, f"Max_Constraint_Zeros_{j}"
    prob += max_label_count >= num_ones, f"Max_Constraint_Ones_{j}"

    # Dominant label proportion constraint: max_label_count <= gamma * valid_preds
    prob += max_label_count <= gamma * valid_preds, f"Gamma_Constraint_LFs_{j}"
    # max_label_count should be greater than or equal to both num_zeros and num_ones
    prob += max_label_count >= num_zeros, f"Max_Constraint_Zero_{j}"
    prob += max_label_count >= num_ones, f"Max_Constraint_One_{j}"

    # Dominant label proportion constraint: max_label_count <= gamma * valid_preds
    prob += max_label_count <= gamma * valid_preds, f"Gamma_Constraint_LF_{j}"

# Constraints: flip predictions to match expected labels where allowed
for i in range(num_sentences):
    for j in range(num_lf):
        if data[i, j] != -1:  # Only consider valid predictions
            # Add a constraint to flip the prediction if it doesn't match the expected label
            prob += ((data[i, j] + flip_vars[i][j]) % 2 == expected_labels[i]), f"Flip_to_Match_Expected_Sentence_{i}_LF_{j}"

# Solve the optimization problem
prob.solve()

# # Flipped data after phase 1
# flipped_data = data.copy()
# for i in range(num_sentences):
#     for j in range(num_lf):
#         if pulp.value(flip_vars[i][j]) == 1:
#             flipped_data[i, j] = 1 if flipped_data[i, j] == 0 else 0  # Flip the label

# # Output the flipped data
# import pandas as pd
# df = pd.DataFrame(flipped_data, columns=[f"LF_{i+1}" for i in range(flipped_data.shape[1])])
# df["ExpectedLabel"] = expected_labels
# print("Flipped Dataset (Phase 1):")
# print(df)

# # Output total number of flips
# total_flips = pulp.value(pulp.lpSum([flip_vars[i][j] for i in range(num_sentences) for j in range(num_lf)]))
# print(f"Total flips made: {total_flips}")


TypeError: unsupported operand type(s) for %: 'LpAffineExpression' and 'int'

In [None]:
flip_vars

In [None]:
prob