In [1]:
import pulp as lp
import pandas as pd
from snorkel.labeling.model import MajorityLabelVoter, LabelModel
from sklearn.metrics import accuracy_score, classification_report


  
| SentenceID | ExpectedLabel | $LF_1$ | $LF_2$| $LF_3$ | $NewLF_i$ |
| --- | --- | --- | --- | --- | --- |
| 1 | 1 | 0 | 1 | 1 | ? |
| 2 | 0 | -1 | 1 | 1 | ? |
| 3 | 1 | 0 | 1 | 1 | ? |
| 4 | 0 | 1 | -1 | 1 | ? |
| 5 | 1 | 0 | 1 | -1 | ? |


In [2]:
original_predictions = pd.DataFrame({
    'lf1': [0, 0, 1, 0, 1], 
    'lf2': [0, -1, 0, 1, 0], 
    'lf3': [0, 1, 1, -1, 1], 
    'nlf1': [0, 0, -1, 1, 1], 
    'nlf2': [0, 0, -1, 0, 0],
    'nlf3': [0, 1, 1, 1, -1],
    'nlf4': [0, 0, 1, 0, 1],
    'tlabel': [1, 0, 1, 0, 1]
})

# Create more instances by repeating and modifying the patterns
expanded_predictions = pd.concat([original_predictions] * 4, ignore_index=True)

# Make existing LFs 'label 1 dominated'
expanded_predictions['lf1'] = [1] * len(expanded_predictions)
expanded_predictions['lf2'] = [1, 1, 1, 0] * 5
expanded_predictions['lf3'] = [1, 1, 1, 1] * 5

# Make new LFs '0 dominated'
expanded_predictions['nlf1'] = [0, 0, -1, 0] * 5
expanded_predictions['nlf2'] = [0, 0, 0, -1] * 5
expanded_predictions['nlf3'] = [0, 0, 0, 1] * 5
expanded_predictions['nlf4'] = [0, 0, 0, 0] * 5

In [3]:
expanded_predictions

Unnamed: 0,lf1,lf2,lf3,nlf1,nlf2,nlf3,nlf4,tlabel
0,1,1,1,0,0,0,0,1
1,1,1,1,0,0,0,0,0
2,1,1,1,-1,0,0,0,1
3,1,0,1,0,-1,1,0,0
4,1,1,1,0,0,0,0,1
5,1,1,1,0,0,0,0,1
6,1,1,1,-1,0,0,0,0
7,1,0,1,0,-1,1,0,1
8,1,1,1,0,0,0,0,0
9,1,1,1,0,0,0,0,1


In [4]:
import pulp
import pandas as pd

original_predictions = expanded_predictions
# Problem initialization
prob = pulp.LpProblem("Label_Flip_Minimization", pulp.LpMinimize)

# Parameters
labeling_functions = original_predictions.columns[:-1]  
num_instances = len(original_predictions)
M = 1000 

nlfs = [lf for lf in labeling_functions if 'nlf' in lf]
x_nlfs = pulp.LpVariable.dicts("x_nlf", nlfs, cat='Binary')

# x_nlf1 = pulp.LpVariable("x_nlf1", cat='Binary')
# x_nlf2 = pulp.LpVariable("x_nlf2", cat='Binary')

P_vars = pulp.LpVariable.dicts("P", (range(num_instances), labeling_functions), 
                               lowBound=-1, upBound=1, cat='Integer')
new_lf_weight = 1

# Binary variables for each type of flip
flip_1_to_0 = pulp.LpVariable.dicts("flip_1_to_0", (range(num_instances), labeling_functions), cat='Binary')
flip_1_to_neg1 = pulp.LpVariable.dicts("flip_1_to_neg1", (range(num_instances), labeling_functions), cat='Binary')
flip_0_to_1 = pulp.LpVariable.dicts("flip_0_to_1", (range(num_instances), labeling_functions), cat='Binary')
flip_0_to_neg1 = pulp.LpVariable.dicts("flip_0_to_neg1", (range(num_instances), labeling_functions), cat='Binary')
flip_neg1_to_1 = pulp.LpVariable.dicts("flip_neg1_to_1", (range(num_instances), labeling_functions), cat='Binary')
flip_neg1_to_0 = pulp.LpVariable.dicts("flip_neg1_to_0", (range(num_instances), labeling_functions), cat='Binary')

# Binary variables to track correctness of predictions (1 if correct, 0 if not)
correctness_vars = pulp.LpVariable.dicts("correct", (range(num_instances), labeling_functions), cat='Binary')

# Objective: Minimize the number of flips
flip_cost = pulp.lpSum([flip_1_to_0[i][lf] + flip_1_to_neg1[i][lf] + 
                        flip_0_to_1[i][lf] + flip_0_to_neg1[i][lf] + 
                        flip_neg1_to_1[i][lf] + flip_neg1_to_0[i][lf] 
                        for i in range(num_instances) for lf in labeling_functions])

# prob += flip_cost + new_lf_weight*(x_nlf1+x_nlf2), "Minimize_Flips"
# maybe we allow deletion of the exising functions as well!
prob += flip_cost + pulp.lpSum([new_lf_weight * x_nlfs[lf] for lf in nlfs]), "Minimize_Flips"


# Mutual exclusivity
for i in range(num_instances):
    for lf in labeling_functions:
        prob += (flip_1_to_0[i][lf] + flip_1_to_neg1[i][lf] + 
                 flip_0_to_1[i][lf] + flip_0_to_neg1[i][lf] + 
                 flip_neg1_to_1[i][lf] + flip_neg1_to_0[i][lf]) <= 1, f"Flip_Exclusivity_{i}_{lf}"

for i in range(num_instances):
    for lf in labeling_functions:
        original_val = original_predictions.loc[i, lf]
        if original_val == 1:
            prob += P_vars[i][lf] == 0 * flip_1_to_0[i][lf] + (-1) * flip_1_to_neg1[i][lf] + 
            1 * (1 - flip_1_to_0[i][lf] - flip_1_to_neg1[i][lf]), f"Flip_From_1_{i}_{lf}"
            
        elif original_val == 0:
            prob += P_vars[i][lf] == 1 * flip_0_to_1[i][lf] + (-1) * flip_0_to_neg1[i][lf] + 
            0 * (1 - flip_0_to_1[i][lf] - flip_0_to_neg1[i][lf]), f"Flip_From_0_{i}_{lf}"
        
        elif original_val == -1:
            prob += P_vars[i][lf] == 1 * flip_neg1_to_1[i][lf] + 0 * flip_neg1_to_0[i][lf] + (-1) * (1 - 
                                             flip_neg1_to_0[i][lf] - flip_neg1_to_1[i][lf]), f"Flip_From_neg1_{i}_{lf}"

for lf in labeling_functions:
    if lf in nlfs:
        lf_correct_predictions = pulp.lpSum([correctness_vars[i][lf] for i in range(num_instances)])
        # Apply Big-M method for conditional inclusion of nLFs
        prob += lf_correct_predictions >= 0.6 * num_instances - M * (1 - x_nlfs[lf]), f"LF_{lf}_Accuracy"
    else:
        lf_correct_predictions = pulp.lpSum([correctness_vars[i][lf] for i in range(num_instances)])
        prob += lf_correct_predictions >= 0.6 * num_instances, f"LF_{lf}_Accuracy"


for i in range(num_instances):
    for lf in nlfs:
        # Ensure that correctness_vars[i][lf] is counted only if x_nlf[lf] = 1
        prob += correctness_vars[i][lf] <= M * x_nlfs[lf], f"{lf}_active_{i}"
    correct_predictions_per_instance = pulp.lpSum([correctness_vars[i][lf] for lf in labeling_functions if lf not in nlfs]) \
                                       + pulp.lpSum([correctness_vars[i][lf] for lf in nlfs])
    num_labeling_functions_used = len(labeling_functions) - len(nlfs) + pulp.lpSum(x_nlfs.values())  # Adjust based on active nLFs
    prob += correct_predictions_per_instance >= 0.7 * num_labeling_functions_used, f"Instance_{i}_Accuracy"
    
    
    
# Ensure correctness tracking between P_vars and true labels
for i in range(num_instances):
    for lf in labeling_functions:
        true_label = original_predictions['tlabel'][i]
        
        # Ensure that correctness_vars[i][lf] is 1 if P_vars[i][lf] equals true_label, else 0
        prob += P_vars[i][lf] - true_label <= M * (1 - correctness_vars[i][lf]), f"Correctness_UpperBound_{i}_{lf}"
        prob += true_label - P_vars[i][lf] <= M * (1 - correctness_vars[i][lf]), f"Correctness_LowerBound_{i}_{lf}"


# Solve the integer program
prob.solve()

p_vars_solution = pd.DataFrame(index=original_predictions.index, columns=labeling_functions)

for i in range(num_instances):
    for lf in labeling_functions:
        p_vars_solution.loc[i, lf] = int(pulp.value(P_vars[i][lf]))

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/89b086186be2444fa3745ccfa14482a9-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/89b086186be2444fa3745ccfa14482a9-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 672 COLUMNS
At line 6109 RHS
At line 6777 BOUNDS
At line 8042 ENDATA
Problem MODEL has 667 rows, 1124 columns and 2344 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is -4.43909e-15 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 232 strengthened rows, 0 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 59 strengthened rows, 0 substitutions
Cgl0004I processed model has 471 rows, 452 columns (452 integer (382 of which binary)) and 1601 elements
Cutoff increment increased from 1e-05 to 0.9999
Cbc0038

In [27]:
x_nlfs

{'nlf1': x_nlf_nlf1,
 'nlf2': x_nlf_nlf2,
 'nlf3': x_nlf_nlf3,
 'nlf4': x_nlf_nlf4}

In [28]:
prob

Label_Flip_Minimization:
MINIMIZE
1*flip_0_to_1_0_lf1 + 1*flip_0_to_1_0_lf2 + 1*flip_0_to_1_0_lf3 + 1*flip_0_to_1_0_nlf1 + 1*flip_0_to_1_0_nlf2 + 1*flip_0_to_1_0_nlf3 + 1*flip_0_to_1_0_nlf4 + 1*flip_0_to_1_10_lf1 + 1*flip_0_to_1_10_lf2 + 1*flip_0_to_1_10_lf3 + 1*flip_0_to_1_10_nlf1 + 1*flip_0_to_1_10_nlf2 + 1*flip_0_to_1_10_nlf3 + 1*flip_0_to_1_10_nlf4 + 1*flip_0_to_1_11_lf1 + 1*flip_0_to_1_11_lf2 + 1*flip_0_to_1_11_lf3 + 1*flip_0_to_1_11_nlf1 + 1*flip_0_to_1_11_nlf2 + 1*flip_0_to_1_11_nlf3 + 1*flip_0_to_1_11_nlf4 + 1*flip_0_to_1_12_lf1 + 1*flip_0_to_1_12_lf2 + 1*flip_0_to_1_12_lf3 + 1*flip_0_to_1_12_nlf1 + 1*flip_0_to_1_12_nlf2 + 1*flip_0_to_1_12_nlf3 + 1*flip_0_to_1_12_nlf4 + 1*flip_0_to_1_13_lf1 + 1*flip_0_to_1_13_lf2 + 1*flip_0_to_1_13_lf3 + 1*flip_0_to_1_13_nlf1 + 1*flip_0_to_1_13_nlf2 + 1*flip_0_to_1_13_nlf3 + 1*flip_0_to_1_13_nlf4 + 1*flip_0_to_1_14_lf1 + 1*flip_0_to_1_14_lf2 + 1*flip_0_to_1_14_lf3 + 1*flip_0_to_1_14_nlf1 + 1*flip_0_to_1_14_nlf2 + 1*flip_0_to_1_14_nlf3 + 1*flip_

In [5]:
# Output results
print(f"Status: {pulp.LpStatus[prob.status]}")

Status: Optimal


In [6]:
correctness_solution = pd.DataFrame(index=original_predictions.index, columns=labeling_functions)
for i in range(num_instances):
    for lf in labeling_functions:
        correctness_solution.loc[i, lf] = int(pulp.value(correctness_vars[i][lf]))

# tools.display_dataframe_to_user(name="Correctness of Predictions", dataframe=correctness_solution)

In [7]:
p_vars_solution

Unnamed: 0,lf1,lf2,lf3,nlf1,nlf2,nlf3,nlf4
0,1,1,1,0,0,0,0
1,0,1,0,0,0,0,0
2,1,1,1,-1,0,0,0
3,1,0,0,0,-1,0,0
4,1,1,1,0,0,1,0
5,1,1,1,0,0,0,0
6,0,1,0,-1,0,0,0
7,1,0,1,0,-1,1,0
8,0,0,1,0,0,0,0
9,1,1,1,0,0,0,0


In [8]:
x_nlfs_solution = {lf: pulp.value(x_nlfs[lf]) for lf in nlfs}

# Display the results
print("x_nlfs results (inclusion of new LFs):")
print(x_nlfs_solution)

x_nlfs results (inclusion of new LFs):
{'nlf1': 0.0, 'nlf2': 0.0, 'nlf3': 1.0, 'nlf4': 0.0}


In [9]:
pulp.value(num_labeling_functions_used)

4.0

In [10]:
# View which labeling functions had flips
flips_result = {}

for i in range(num_instances):
    for lf in labeling_functions:
        flip_result = {
            '1_to_0': pulp.value(flip_1_to_0[i][lf]),
            '1_to_neg1': pulp.value(flip_1_to_neg1[i][lf]),
            '0_to_1': pulp.value(flip_0_to_1[i][lf]),
            '0_to_neg1': pulp.value(flip_0_to_neg1[i][lf]),
            'neg1_to_1': pulp.value(flip_neg1_to_1[i][lf]),
            'neg1_to_0': pulp.value(flip_neg1_to_0[i][lf])
        }
        flips_result[(i, lf)] = flip_result

# Print flips result
print("Flips per instance and labeling function:")
for key, value in flips_result.items():
    i, lf = key
    print(f"Instance {i}, Labeling Function {lf}: {value}")

# View which new labeling functions (nLFs) were included
included_nlfs = {lf: pulp.value(x_nlfs[lf]) for lf in nlfs}

# Print included nLFs
print("\nIncluded New Labeling Functions (nLFs):")
for lf, included in included_nlfs.items():
    print(f"{lf}: {'Included' if included == 1 else 'Not Included'}")


Flips per instance and labeling function:
Instance 0, Labeling Function lf1: {'1_to_0': 0.0, '1_to_neg1': 0.0, '0_to_1': 0.0, '0_to_neg1': 0.0, 'neg1_to_1': 0.0, 'neg1_to_0': 0.0}
Instance 0, Labeling Function lf2: {'1_to_0': 0.0, '1_to_neg1': 0.0, '0_to_1': 0.0, '0_to_neg1': 0.0, 'neg1_to_1': 0.0, 'neg1_to_0': 0.0}
Instance 0, Labeling Function lf3: {'1_to_0': 0.0, '1_to_neg1': 0.0, '0_to_1': 0.0, '0_to_neg1': 0.0, 'neg1_to_1': 0.0, 'neg1_to_0': 0.0}
Instance 0, Labeling Function nlf1: {'1_to_0': 0.0, '1_to_neg1': 0.0, '0_to_1': 0.0, '0_to_neg1': 0.0, 'neg1_to_1': 0.0, 'neg1_to_0': 0.0}
Instance 0, Labeling Function nlf2: {'1_to_0': 0.0, '1_to_neg1': 0.0, '0_to_1': 0.0, '0_to_neg1': 0.0, 'neg1_to_1': 0.0, 'neg1_to_0': 0.0}
Instance 0, Labeling Function nlf3: {'1_to_0': 0.0, '1_to_neg1': 0.0, '0_to_1': 0.0, '0_to_neg1': 0.0, 'neg1_to_1': 0.0, 'neg1_to_0': 0.0}
Instance 0, Labeling Function nlf4: {'1_to_0': 0.0, '1_to_neg1': 0.0, '0_to_1': 0.0, '0_to_neg1': 0.0, 'neg1_to_1': 0.0, 'neg1_

In [11]:
# simple snorkel sensitivity test

In [12]:
original_lf_list = [lf for lf in labeling_functions if 'nlf' not in lf]
original_lf_list

['lf1', 'lf2', 'lf3']

In [29]:
lf_matrix_original

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 1]])

In [13]:
lf_matrix_original = original_predictions[original_lf_list].values
# from snorkel.labeling import LabelModel

# Initialize LabelModel (adjust the number of classes if necessary)
label_model = LabelModel(cardinality=2, verbose=True)

# Fit the model using the labeling function matrix
label_model.fit(L_train=lf_matrix_original, n_epochs=100, log_freq=10, seed=42)

INFO:root:Computing O...
INFO:root:Estimating \mu...
  0%|                                                               | 0/100 [00:00<?, ?epoch/s]INFO:root:[0 epochs]: TRAIN:[loss=5.500]
INFO:root:[10 epochs]: TRAIN:[loss=0.131]
INFO:root:[20 epochs]: TRAIN:[loss=0.045]
INFO:root:[30 epochs]: TRAIN:[loss=0.119]
INFO:root:[40 epochs]: TRAIN:[loss=0.067]
INFO:root:[50 epochs]: TRAIN:[loss=0.038]
INFO:root:[60 epochs]: TRAIN:[loss=0.019]
INFO:root:[70 epochs]: TRAIN:[loss=0.010]
INFO:root:[80 epochs]: TRAIN:[loss=0.006]
INFO:root:[90 epochs]: TRAIN:[loss=0.004]
100%|███████████████████████████████████████████████████| 100/100 [00:00<00:00, 1387.05epoch/s]
INFO:root:Finished Training


In [14]:
lf_matrix_original

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 1]])

In [15]:
predicted_labels_original = label_model.predict(L=lf_matrix_original)

In [16]:
predicted_labels_original

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [17]:
true_labels = original_predictions['tlabel'].values

In [18]:
def evaluate_snorkel_acc(true_labels, pred_labels):
    true_labels = original_predictions['tlabel'].values

    # Accuracy
    accuracy = accuracy_score(true_labels, pred_labels)
    print(f"Accuracy: {accuracy:.4f}")

    # Detailed classification report (precision, recall, F1-score)
    print(classification_report(true_labels, pred_labels))

In [19]:
evaluate_snorkel_acc(true_labels=true_labels, pred_labels=predicted_labels_original)

Accuracy: 0.6000
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.60      1.00      0.75        12

    accuracy                           0.60        20
   macro avg       0.30      0.50      0.37        20
weighted avg       0.36      0.60      0.45        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [20]:
label_model_new = LabelModel(cardinality=2, verbose=True)

In [21]:
label_model.cardinality

2

In [22]:
label_model_new.cardinality

2

In [23]:
p_vars_solution

Unnamed: 0,lf1,lf2,lf3,nlf1,nlf2,nlf3,nlf4
0,1,1,1,0,0,0,0
1,0,1,0,0,0,0,0
2,1,1,1,-1,0,0,0
3,1,0,0,0,-1,0,0
4,1,1,1,0,0,1,0
5,1,1,1,0,0,0,0
6,0,1,0,-1,0,0,0
7,1,0,1,0,-1,1,0
8,0,0,1,0,0,0,0
9,1,1,1,0,0,0,0


In [24]:
lf_matrix_new = p_vars_solution.values.astype(int)
# array = array.  # or float, depending on the needs

label_model_new.fit(L_train=lf_matrix_new, n_epochs=100, log_freq=10, seed=42)
predicted_labels_new = label_model_new.predict(L=lf_matrix_new)

INFO:root:Computing O...
INFO:root:Estimating \mu...
  0%|                                                               | 0/100 [00:00<?, ?epoch/s]INFO:root:[0 epochs]: TRAIN:[loss=18.633]
INFO:root:[10 epochs]: TRAIN:[loss=7.061]
INFO:root:[20 epochs]: TRAIN:[loss=2.494]
INFO:root:[30 epochs]: TRAIN:[loss=0.247]
INFO:root:[40 epochs]: TRAIN:[loss=0.376]
INFO:root:[50 epochs]: TRAIN:[loss=0.258]
INFO:root:[60 epochs]: TRAIN:[loss=0.142]
INFO:root:[70 epochs]: TRAIN:[loss=0.147]
INFO:root:[80 epochs]: TRAIN:[loss=0.140]
INFO:root:[90 epochs]: TRAIN:[loss=0.135]
100%|███████████████████████████████████████████████████| 100/100 [00:00<00:00, 1372.00epoch/s]
INFO:root:Finished Training


In [30]:
true_labels

array([1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1])

In [25]:
predicted_labels_new

array([0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1])

In [26]:
evaluate_snorkel_acc(true_labels=true_labels, pred_labels=predicted_labels_new)

Accuracy: 0.5500
              precision    recall  f1-score   support

           0       0.47      0.88      0.61         8
           1       0.80      0.33      0.47        12

    accuracy                           0.55        20
   macro avg       0.63      0.60      0.54        20
weighted avg       0.67      0.55      0.53        20

