#  variables 

1. user input size (20, 40, 80)
2. lf_acc_thresh ()
3. instance_acc_thresh
4. min_non_abstain_thresh
5. kwg related params

In [1]:
# from rbbm_src.labelling_func_src.src.utils import lf_constraint_solve
from rbbm_src.labelling_func_src.src.lfs_tree import keyword_labelling_func_builder
from rbbm_src.labelling_func_src.src.TreeRules import SPAM, HAM, ABSTAIN, PredicateNode
from rbbm_src.labelling_func_src.src.LFRepair import populate_violations, fix_rules_with_solver_input
from rbbm_src.labelling_func_src.src.classes import clean_text

import re
import psycopg2
import pandas as pd
from snorkel.labeling import (
	LabelingFunction, 
	labeling_function, 
	PandasLFApplier, 
	LFAnalysis,
	filter_unlabeled_dataframe
	)
from snorkel.labeling.model import MajorityLabelVoter, LabelModel
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import matplotlib.patches as mpatches
import pulp
from sklearn.metrics import accuracy_score, classification_report

from rbbm_src.labelling_func_src.src.KeyWordRuleMiner import KeyWordRuleMiner 
# sample user confirmation and complaints
import random
from collections import deque
import numpy as np
import pickle
import pydot
from IPython.display import Image, display 

import datetime


[nltk_data] Downloading package words to /home/opc/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package stopwords to /home/opc/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
2024-10-15 21:53:15.991886: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-15 21:53:16.042403: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-15 21:53:16.043715: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
DEBUG 2024-10-15 21:53:16,862 [tpu_cluster_resolver.py:<module>:32] Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install c

In [2]:
from collections import defaultdict

In [3]:
def calculate_coverage(df, lf_column):
    total_instances = len(df)
    labeled_instances = df[lf_column] != -1
    coverage = labeled_instances.sum() / total_instances
    return coverage

def calculate_accuracy(df, lf_column, ground_truth_column):
    labeled_instances = df[df[lf_column] != -1]
    correct_labels = (labeled_instances[lf_column] == labeled_instances[ground_truth_column]).sum()
    total_labeled = len(labeled_instances)
    if total_labeled == 0:
        return 0.0  # Avoid division by zero if no instances are labeled
    accuracy = correct_labels / total_labeled
    return accuracy

In [4]:
def run_snorkel_with_funcs(dataset_name, funcs, conn):
    
    sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
    sentences_df = sentences_df.rename(columns={"class": "expected_label", "content": "old_text"})
    sentences_df['text'] = sentences_df['old_text'].apply(lambda s: clean_text(s))
    sentences_df = sentences_df[~sentences_df['text'].isna()]
    applier = PandasLFApplier(lfs=funcs)
    initial_vectors = applier.apply(df=sentences_df, progress_bar=False)
    model = LabelModel(cardinality=2, verbose=True, device='cpu')
    model.fit(L_train=initial_vectors, n_epochs=500, log_freq=100, seed=123)
    probs_test= model.predict_proba(L=initial_vectors)
    df_sentences_filtered, probs_test_filtered, filtered_vectors, df_no_signal  = filter_unlabeled_dataframe(
        X=sentences_df, y=probs_test, L=initial_vectors
    )	

    df_sentences_filtered = df_sentences_filtered.reset_index(drop=True)
    prob_diffs = [abs(t[0]-t[1]) for t in probs_test_filtered]
    prob_diffs_tuples = [(t[0],t[1]) for t in probs_test_filtered]
    df_sentences_filtered['model_pred_diff'] = pd.Series(prob_diffs)
    df_sentences_filtered['model_pred_prob_tuple'] = pd.Series(prob_diffs_tuples)
    df_sentences_filtered['model_pred'] = pd.Series(model.predict(L=filtered_vectors))

    wrong_preds = df_sentences_filtered[(df_sentences_filtered['expected_label']!=df_sentences_filtered['model_pred'])]
    # df_sentences_filtered.to_csv('predictions_shakira.csv', index=False)
    # logger.critical(wrong_preds)
    global_accuray_on_valid=(len(df_sentences_filtered)-len(wrong_preds))/len(df_sentences_filtered)

    print(f"""
        out of {len(sentences_df)} sentences, {len(df_sentences_filtered)} actually got at least one signal to \n
        make prediction. Out of all the valid predictions, we have {len(wrong_preds)} wrong predictions, \n
        accuracy = {(len(df_sentences_filtered)-len(wrong_preds))/len(df_sentences_filtered)} 
    """)
    
    global_accuracy = (len(df_sentences_filtered)-len(wrong_preds))/len(sentences_df)
    
    
    ground_truth = df_sentences_filtered['expected_label']
    snorkel_predictions = df_sentences_filtered['model_pred']
    snorkel_probs = df_sentences_filtered['model_pred_diff']
    df_sentences_filtered['vectors'] = pd.Series([",".join(map(str, t)) for t in filtered_vectors])
    correct_predictions = (snorkel_predictions == ground_truth)
    incorrect_predictions = (snorkel_predictions != ground_truth)
    correct_preds_by_snorkel = df_sentences_filtered[correct_predictions].reset_index(drop=True)
    wrong_preds_by_snorkel = df_sentences_filtered[incorrect_predictions].reset_index(drop=True)
    
    return df_sentences_filtered, correct_preds_by_snorkel, wrong_preds_by_snorkel, filtered_vectors, correct_predictions, incorrect_predictions, global_accuracy, global_accuray_on_valid 

In [5]:
def select_user_input(user_confirm_size,
                     user_complaint_size,
                     random_state,
                     filtered_vectors,
                     correct_preds_by_snorkel,
                     wrong_preds_by_snorkel,
                      correct_predictions,
                      incorrect_predictions ):

    user_confirm_df = correct_preds_by_snorkel.sample(n=user_confirm_size, random_state=random_state)
    user_complaints_df = wrong_preds_by_snorkel.sample(n=user_complaint_size, random_state=random_state)
    
    random_confirm_indices = user_confirm_df.index
    random_complaints_indices = user_complaints_df.index
    random_user_confirms_vecs = filtered_vectors[correct_predictions][random_confirm_indices]
    random_user_complaints_vecs = filtered_vectors[incorrect_predictions][random_complaints_indices]
    user_input_df = pd.concat([user_confirm_df, user_complaints_df])
    gts = user_input_df['expected_label'].reset_index(drop=True)
    user_vecs = np.vstack((random_user_confirms_vecs, random_user_complaints_vecs))
    
    return user_vecs, gts, user_input_df


In [6]:
def gather_user_input_signals_on_rules(tree_rules, user_input):
    leaf_nodes = []
    
    for atui in tree_rules:
        rids = set([])
        for i, c in user_input.iterrows():
            leaf_node_with_complaints = populate_violations(atui, c)
            if(leaf_node_with_complaints.number not in rids):
                rids.add(leaf_node_with_complaints.number)
                leaf_nodes.append(leaf_node_with_complaints)
            
    uinput_unsatisfied_counts = defaultdict(int)
    
    for ln in leaf_nodes:
        if(ln.label==ABSTAIN):
            for l in [SPAM, HAM]:
                for u in ln.pairs[l]:
                    uinput_unsatisfied_counts[u['cid']]+=1
    
    return uinput_unsatisfied_counts

In [7]:
def gather_used_keywords(tree_rules):
    
    used_keywords = []
    
    for atui in tree_rules:
        rids = set([])
        queue = deque([atui.root])
        while(queue):
            cur_node = queue.popleft()
            if(isinstance(cur_node, PredicateNode)):
                used_keywords.extend(cur_node.pred.keywords)
            if(cur_node.left):
                queue.append(cur_node.left)
            if(cur_node.right):
                queue.append(cur_node.right)
    
    return used_keywords

#     for i, c in sorted_df.iterrows():
#         leaf_node_with_complaints = populate_violations(atui, c)
#         if(leaf_node_with_complaints.number not in rids):
#             rids.add(leaf_node_with_complaints.number)
#             leaf_nodes.append(leaf_node_with_complaints)

In [8]:
def apply_new_lfs_to_df(new_funcs, user_input_df):
    new_rules_applier = PandasLFApplier(lfs=new_funcs)
    new_rules_vector = new_rules_applier.apply(df=user_input_df, progress_bar=False)
    
    return new_rules_vector


In [9]:
def construct_input_df_to_solver(user_vecs, gts):
    
#     df_new_vectors = pd.DataFrame(new_rules_vector, columns=[f'nlf_{i+1}' for i in range(new_rules_vector.shape[1])])
    df_user_vectors = pd.DataFrame(user_vecs, columns=[f'lf_{i+1}' for i in range(user_vecs.shape[1])])
    combined_df= pd.concat([df_user_vectors, gts], axis=1)
    
    return combined_df

In [10]:
# def lf_constraint_solve(df, lf_acc_thresh=0.5, 
#                         instance_acc_thresh=0.5,
#                         min_non_abstain_thresh=0.8,
#                         nlf_prefix='nlf_',
#                         expected_label_col='expected_label',
#                         new_lf_weight=0.1):
    
#     # Problem initialization
#     prob = pulp.LpProblem("Label_Flip_Minimization", pulp.LpMinimize)

#     # Parameters
# #     labeling_functions = df.columns[:-1]
#     labeling_functions = [lf_name for lf_name in df.columns if lf_name!=expected_label_col]
#     print(f"lf_acc: {lf_acc_thresh}, ins_acc:{instance_acc_thresh}, min_non_abstain_thresh")
#     print(f"labeling_functions: {labeling_functions}")
#     num_instances = len(df)
#     print(f"num_instances: {num_instances}")
#     M = 5
    
#     nlfs = [lf for lf in labeling_functions if nlf_prefix in lf]
#     print(f"nlfs: {nlfs}")
#     x_nlfs = pulp.LpVariable.dicts("x_nlf", nlfs, cat='Binary')

#     P_vars = pulp.LpVariable.dicts("P", (range(num_instances), labeling_functions), 
#                                    lowBound=-1, upBound=1, cat='Integer')
    
#     is_abstain = pulp.LpVariable.dicts("is_abstain", 
#                                (range(num_instances), labeling_functions), 
#                                cat='Binary')

#     flip_1_to_0 = pulp.LpVariable.dicts("flip_1_to_0", 
#                                         (range(num_instances), labeling_functions), cat='Binary')
#     flip_1_to_neg1 = pulp.LpVariable.dicts("flip_1_to_neg1", 
#                                            (range(num_instances), labeling_functions), cat='Binary')
#     flip_0_to_1 = pulp.LpVariable.dicts("flip_0_to_1", 
#                                         (range(num_instances), labeling_functions), cat='Binary')
#     flip_0_to_neg1 = pulp.LpVariable.dicts("flip_0_to_neg1", 
#                                            (range(num_instances), labeling_functions), cat='Binary')
#     flip_neg1_to_1 = pulp.LpVariable.dicts("flip_neg1_to_1", 
#                                            (range(num_instances), labeling_functions), cat='Binary')
#     flip_neg1_to_0 = pulp.LpVariable.dicts("flip_neg1_to_0", 
#                                            (range(num_instances), labeling_functions), cat='Binary')

#     # Binary variables to track correctness of predictions (1 if correct, 0 if not)
#     correctness_vars = pulp.LpVariable.dicts("correct", 
#                                              (range(num_instances), labeling_functions), cat='Binary')
    
#     # Create auxiliary variables to represent active nLF abstains
#     active_abstain = pulp.LpVariable.dicts("active_abstain", 
#                                            (range(num_instances), nlfs), 
#                                            cat='Binary')
    
#     correct_and_active = pulp.LpVariable.dicts("correct_and_active", 
#                                            (range(num_instances), nlfs), 
#                                            cat='Binary')


#     # Objective: Minimize the number of flips
#     flip_cost = pulp.lpSum([flip_1_to_0[i][lf] + flip_1_to_neg1[i][lf] + 
#                             flip_0_to_1[i][lf] + flip_0_to_neg1[i][lf] + 
#                             flip_neg1_to_1[i][lf] + flip_neg1_to_0[i][lf] 
#                             for i in range(num_instances) for lf in labeling_functions])

#     prob += flip_cost + pulp.lpSum([new_lf_weight * x_nlfs[lf] for lf in nlfs]), "Minimize_Flips"


#     # Mutual exclusivity
#     for i in range(num_instances):
#         for lf in labeling_functions:
#             prob += (flip_1_to_0[i][lf] + flip_1_to_neg1[i][lf] + 
#                      flip_0_to_1[i][lf] + flip_0_to_neg1[i][lf] + 
#                      flip_neg1_to_1[i][lf] + flip_neg1_to_0[i][lf]) <= 1, f"Flip_Exclusivity_{i}_{lf}"

#     for i in range(num_instances):
#         for lf in labeling_functions:
#             original_val = df.loc[i, lf]
#             if original_val == 1:
#                 prob += P_vars[i][lf] == 0 * flip_1_to_0[i][lf] + \
#                 (-1) * flip_1_to_neg1[i][lf] + 1 * (1 - flip_1_to_0[i][lf] - flip_1_to_neg1[i][lf]), f"Flip_From_1_{i}_{lf}"
                
#             elif original_val == 0:                
#                 prob += P_vars[i][lf] == 1 * flip_0_to_1[i][lf] + \
#                 (-1) * flip_0_to_neg1[i][lf] + 0 * (1 - flip_0_to_1[i][lf] - flip_0_to_neg1[i][lf]), f"Flip_From_0_{i}_{lf}"
                
#             elif original_val == -1:
#                 prob += P_vars[i][lf] == 1 * flip_neg1_to_1[i][lf] + 0 * flip_neg1_to_0[i][lf] + (-1) * (1 - flip_neg1_to_1[i][lf] - flip_neg1_to_0[i][lf]), f"Flip_From_neg1_{i}_{lf}"
    
#     for i in range(num_instances):
#         for lf in labeling_functions:
#             prob += P_vars[i][lf] >= -1 - (1 - is_abstain[i][lf]) * M, f"Abstain_LowerBound_{i}_{lf}"
#             prob += P_vars[i][lf] <= -1 + (1 - is_abstain[i][lf]) * M, f"Abstain_UpperBound_{i}_{lf}"

#             # If is_abstain[i][lf] == 0, P_vars[i][lf] can only be 0 or 1
#             prob += P_vars[i][lf] >= 0 - is_abstain[i][lf] * M, f"Non_Abstain_LowerBound_{i}_{lf}"
#             prob += P_vars[i][lf] <= 1 + is_abstain[i][lf] * M, f"Non_Abstain_UpperBound_{i}_{lf}"
    
#     # Set up the constraints for the auxiliary variables
#     for i in range(num_instances):
#         for lf in nlfs:
#             # Ensure active_abstain[i][lf] is 1 only if both is_abstain[i][lf] == 1 and x_nlfs[lf] == 1
#             prob += active_abstain[i][lf] <= is_abstain[i][lf], f"ActiveAbstain_LF_{lf}_Instance_{i}_1"
#             prob += active_abstain[i][lf] <= x_nlfs[lf], f"ActiveAbstain_LF_{lf}_Instance_{i}_2"
#             prob += active_abstain[i][lf] >= is_abstain[i][lf] + x_nlfs[lf] - 1, f"ActiveAbstain_LF_{lf}_Instance_{i}_3"

#     for i in range(num_instances):
#         for lf in nlfs:
#             # correct_and_active[i][lf] should be 1 only if both correctness_vars[i][lf] == 1 and x_nlfs[lf] == 1
#             prob += correct_and_active[i][lf] <= correctness_vars[i][lf], f"CorrectAndActive_UpperBound_1_{i}_{lf}"
#             prob += correct_and_active[i][lf] <= x_nlfs[lf], f"CorrectAndActive_UpperBound_2_{i}_{lf}"
#             prob += correct_and_active[i][lf] >= correctness_vars[i][lf] + x_nlfs[lf] - 1, f"CorrectAndActive_LowerBound_{i}_{lf}"
        
    
#     for lf in labeling_functions:
#         num_instances_abstain = pulp.lpSum([is_abstain[i][lf] for i in range(num_instances)])
#         if lf in nlfs:
#             lf_correct_predictions = pulp.lpSum([correctness_vars[i][lf] for i in range(num_instances)])
#             prob += lf_correct_predictions >= lf_acc_thresh * (num_instances-num_instances_abstain) - M * (1 - x_nlfs[lf]), f"LF_{lf}_Accuracy"
#         else:
#             lf_correct_predictions = pulp.lpSum([correctness_vars[i][lf] for i in range(num_instances)])
#             prob += lf_correct_predictions >= lf_acc_thresh * (num_instances-num_instances_abstain), f"LF_{lf}_Accuracy"



#     for i in range(num_instances):
#         for lf in nlfs:
#             # Ensure that correctness_vars[i][lf] is counted only if x_nlf[lf] = 1
#             prob += correctness_vars[i][lf] <= M * x_nlfs[lf], f"{lf}_active_{i}"
            
#         correct_predictions_per_instance = pulp.lpSum([correctness_vars[i][lf] for lf in labeling_functions if lf not in nlfs]) + \
#                                pulp.lpSum([correct_and_active[i][lf] for lf in nlfs])
#         instance_abstain_count = pulp.lpSum([is_abstain[i][lf] for lf in labeling_functions if lf not in nlfs]) + \
#                                  pulp.lpSum([active_abstain[i][lf] for lf in nlfs]) 
        
#         num_labeling_functions_used = len(labeling_functions) - len(nlfs) + pulp.lpSum(x_nlfs.values())
#         prob += correct_predictions_per_instance >= instance_acc_thresh * num_labeling_functions_used, f"Instance_{i}_Accuracy"
#         prob += instance_abstain_count <= num_labeling_functions_used *(1- min_non_abstain_thresh), f"Instance_{i}_NonAbastain"

        
#     for i in range(num_instances):
#         for lf in labeling_functions:
#             true_label = df[expected_label_col][i]
#             # Ensure that correctness_vars[i][lf] is 1 if P_vars[i][lf] equals true_label, else 0
#             prob += P_vars[i][lf] - true_label <= M * (1 - correctness_vars[i][lf]),\
#                                      f"Correctness_UpperBound_{i}_{lf}"
#             prob += true_label - P_vars[i][lf] <= M * (1 - correctness_vars[i][lf]), \
#                                      f"Correctness_LowerBound_{i}_{lf}"


#     # Solve the integer program
#     prob.solve()

#     p_vars_solution = pd.DataFrame(index=df.index, columns=labeling_functions)
#     active_abstain_df = pd.DataFrame(index=df.index, columns=labeling_functions)
#     is_abstain_df = pd.DataFrame(index=df.index, columns=labeling_functions)
    
#     for i in range(num_instances):
#         for lf in labeling_functions:
#             p_vars_solution.loc[i, lf] = int(pulp.value(P_vars[i][lf]))
    
#     correctness_solution = pd.DataFrame(index=df.index, columns=labeling_functions)
#     for i in range(num_instances):
#         for lf in labeling_functions:
#             correctness_solution.loc[i, lf] = int(pulp.value(correctness_vars[i][lf]))
    
#     x_nlfs_solution = {lf: pulp.value(x_nlfs[lf]) for lf in nlfs}
    
#     print(f"Status: {pulp.LpStatus[prob.status]}")
#     print(f"pulp.value(num_labeling_functions_used) : {pulp.value(num_labeling_functions_used)}")
    
#     for i in range(num_instances):
#         for lf in labeling_functions:
#             is_abstain_df.loc[i, lf] = int(pulp.value(is_abstain[i][lf]))
#     for i in range(num_instances):
#         for lf in nlfs:
#             active_abstain_df.loc[i, lf] = int(pulp.value(active_abstain[i][lf]))
    
#     return p_vars_solution, x_nlfs_solution, pulp, prob, active_abstain_df, is_abstain_df


In [11]:
def lf_constraint_solve_no_new_lf_and_non_abstain_thresh(df, lf_acc_thresh=0.5, 
                        instance_acc_thresh=0.5,
#                         min_non_abstain_thresh=0.8,
#                         nlf_prefix='nlf_',
                        expected_label_col='expected_label',
#                         new_lf_weight=0.1
                       ):
    
    # Problem initialization
    prob = pulp.LpProblem("Label_Flip_Minimization", pulp.LpMinimize)

    # Parameters
#     labeling_functions = df.columns[:-1]
    labeling_functions = [lf_name for lf_name in df.columns if lf_name!=expected_label_col]
    print(f"lf_acc: {lf_acc_thresh}, ins_acc:{instance_acc_thresh}")
    print(f"labeling_functions: {labeling_functions}")
    num_instances = len(df)
    print(f"num_instances: {num_instances}")
    M = 5
    
#     nlfs = [lf for lf in labeling_functions if nlf_prefix in lf]
#     print(f"nlfs: {nlfs}")
#     x_nlfs = pulp.LpVariable.dicts("x_nlf", nlfs, cat='Binary')

    P_vars = pulp.LpVariable.dicts("P", (range(num_instances), labeling_functions), 
                                   lowBound=-1, upBound=1, cat='Integer')
    
    is_abstain = pulp.LpVariable.dicts("is_abstain", 
                               (range(num_instances), labeling_functions), 
                               cat='Binary')

    flip_1_to_0 = pulp.LpVariable.dicts("flip_1_to_0", 
                                        (range(num_instances), labeling_functions), cat='Binary')
    flip_1_to_neg1 = pulp.LpVariable.dicts("flip_1_to_neg1", 
                                           (range(num_instances), labeling_functions), cat='Binary')
    flip_0_to_1 = pulp.LpVariable.dicts("flip_0_to_1", 
                                        (range(num_instances), labeling_functions), cat='Binary')
    flip_0_to_neg1 = pulp.LpVariable.dicts("flip_0_to_neg1", 
                                           (range(num_instances), labeling_functions), cat='Binary')
    flip_neg1_to_1 = pulp.LpVariable.dicts("flip_neg1_to_1", 
                                           (range(num_instances), labeling_functions), cat='Binary')
    flip_neg1_to_0 = pulp.LpVariable.dicts("flip_neg1_to_0", 
                                           (range(num_instances), labeling_functions), cat='Binary')

    # Binary variables to track correctness of predictions (1 if correct, 0 if not)
    correctness_vars = pulp.LpVariable.dicts("correct", 
                                             (range(num_instances), labeling_functions), cat='Binary')
    
#     # Create auxiliary variables to represent active nLF abstains
#     active_abstain = pulp.LpVariable.dicts("active_abstain", 
#                                            (range(num_instances), nlfs), 
#                                            cat='Binary')
    
#     correct_and_active = pulp.LpVariable.dicts("correct_and_active", 
#                                            (range(num_instances), nlfs), 
#                                            cat='Binary')


    # Objective: Minimize the number of flips
    flip_cost = pulp.lpSum([flip_1_to_0[i][lf] + flip_1_to_neg1[i][lf] + 
                            flip_0_to_1[i][lf] + flip_0_to_neg1[i][lf] + 
                            flip_neg1_to_1[i][lf] + flip_neg1_to_0[i][lf] 
                            for i in range(num_instances) for lf in labeling_functions])

#     prob += flip_cost + pulp.lpSum([new_lf_weight * x_nlfs[lf] for lf in nlfs]), "Minimize_Flips"
    prob += flip_cost, "Minimize_Flips"


    # Mutual exclusivity
    for i in range(num_instances):
        for lf in labeling_functions:
            prob += (flip_1_to_0[i][lf] + flip_1_to_neg1[i][lf] + 
                     flip_0_to_1[i][lf] + flip_0_to_neg1[i][lf] + 
                     flip_neg1_to_1[i][lf] + flip_neg1_to_0[i][lf]) <= 1, f"Flip_Exclusivity_{i}_{lf}"

    for i in range(num_instances):
        for lf in labeling_functions:
            original_val = df.loc[i, lf]
            if original_val == 1:
                prob += P_vars[i][lf] == 0 * flip_1_to_0[i][lf] + \
                (-1) * flip_1_to_neg1[i][lf] + 1 * (1 - flip_1_to_0[i][lf] - flip_1_to_neg1[i][lf]), f"Flip_From_1_{i}_{lf}"
                
            elif original_val == 0:                
                prob += P_vars[i][lf] == 1 * flip_0_to_1[i][lf] + \
                (-1) * flip_0_to_neg1[i][lf] + 0 * (1 - flip_0_to_1[i][lf] - flip_0_to_neg1[i][lf]), f"Flip_From_0_{i}_{lf}"
                
            elif original_val == -1:
                prob += P_vars[i][lf] == 1 * flip_neg1_to_1[i][lf] + 0 * flip_neg1_to_0[i][lf] + (-1) * (1 - flip_neg1_to_1[i][lf] - flip_neg1_to_0[i][lf]), f"Flip_From_neg1_{i}_{lf}"
    
    for i in range(num_instances):
        for lf in labeling_functions:
            prob += P_vars[i][lf] >= -1 - (1 - is_abstain[i][lf]) * M, f"Abstain_LowerBound_{i}_{lf}"
            prob += P_vars[i][lf] <= -1 + (1 - is_abstain[i][lf]) * M, f"Abstain_UpperBound_{i}_{lf}"

            # If is_abstain[i][lf] == 0, P_vars[i][lf] can only be 0 or 1
            prob += P_vars[i][lf] >= 0 - is_abstain[i][lf] * M, f"Non_Abstain_LowerBound_{i}_{lf}"
            prob += P_vars[i][lf] <= 1 + is_abstain[i][lf] * M, f"Non_Abstain_UpperBound_{i}_{lf}"
    
    # Set up the constraints for the auxiliary variables
#     for i in range(num_instances):
#         for lf in nlfs:
#             # Ensure active_abstain[i][lf] is 1 only if both is_abstain[i][lf] == 1 and x_nlfs[lf] == 1
#             prob += active_abstain[i][lf] <= is_abstain[i][lf], f"ActiveAbstain_LF_{lf}_Instance_{i}_1"
#             prob += active_abstain[i][lf] <= x_nlfs[lf], f"ActiveAbstain_LF_{lf}_Instance_{i}_2"
#             prob += active_abstain[i][lf] >= is_abstain[i][lf] + x_nlfs[lf] - 1, f"ActiveAbstain_LF_{lf}_Instance_{i}_3"

#     for i in range(num_instances):
#         for lf in nlfs:
#             # correct_and_active[i][lf] should be 1 only if both correctness_vars[i][lf] == 1 and x_nlfs[lf] == 1
#             prob += correct_and_active[i][lf] <= correctness_vars[i][lf], f"CorrectAndActive_UpperBound_1_{i}_{lf}"
#             prob += correct_and_active[i][lf] <= x_nlfs[lf], f"CorrectAndActive_UpperBound_2_{i}_{lf}"
#             prob += correct_and_active[i][lf] >= correctness_vars[i][lf] + x_nlfs[lf] - 1, f"CorrectAndActive_LowerBound_{i}_{lf}"
        
    
    for lf in labeling_functions:
        num_instances_abstain = pulp.lpSum([is_abstain[i][lf] for i in range(num_instances)])
#         if lf in nlfs:
#             lf_correct_predictions = pulp.lpSum([correctness_vars[i][lf] for i in range(num_instances)])
#             prob += lf_correct_predictions >= lf_acc_thresh * (num_instances-num_instances_abstain) - M * (1 - x_nlfs[lf]), f"LF_{lf}_Accuracy"
#         else:
        lf_correct_predictions = pulp.lpSum([correctness_vars[i][lf] for i in range(num_instances)])
        prob += lf_correct_predictions >= lf_acc_thresh * (num_instances-num_instances_abstain), f"LF_{lf}_Accuracy"



    for i in range(num_instances):
#         for lf in nlfs:
#             # Ensure that correctness_vars[i][lf] is counted only if x_nlf[lf] = 1
#             prob += correctness_vars[i][lf] <= M * x_nlfs[lf], f"{lf}_active_{i}"
            
#         correct_predictions_per_instance = pulp.lpSum([correctness_vars[i][lf] for lf in labeling_functions if lf not in nlfs]) + \
#                                pulp.lpSum([correct_and_active[i][lf] for lf in nlfs])
        correct_predictions_per_instance = pulp.lpSum([correctness_vars[i][lf] for lf in labeling_functions])
            
#         instance_abstain_count = pulp.lpSum([is_abstain[i][lf] for lf in labeling_functions if lf not in nlfs]) + \
#                                  pulp.lpSum([active_abstain[i][lf] for lf in nlfs]) 
        instance_abstain_count = pulp.lpSum([is_abstain[i][lf] for lf in labeling_functions])
        
#         num_labeling_functions_used = len(labeling_functions) - len(nlfs) + pulp.lpSum(x_nlfs.values())
        num_labeling_functions_used = len(labeling_functions)

        prob += correct_predictions_per_instance >= instance_acc_thresh * (num_labeling_functions_used-instance_abstain_count), f"Instance_{i}_Accuracy"
#         prob += instance_abstain_count <= num_labeling_functions_used *(1- min_non_abstain_thresh), f"Instance_{i}_NonAbastain"

        
    for i in range(num_instances):
        for lf in labeling_functions:
            true_label = df[expected_label_col][i]
            # Ensure that correctness_vars[i][lf] is 1 if P_vars[i][lf] equals true_label, else 0
            prob += P_vars[i][lf] - true_label <= M * (1 - correctness_vars[i][lf]),\
                                     f"Correctness_UpperBound_{i}_{lf}"
            prob += true_label - P_vars[i][lf] <= M * (1 - correctness_vars[i][lf]), \
                                     f"Correctness_LowerBound_{i}_{lf}"


    # Solve the integer program
    prob.solve()

    p_vars_solution = pd.DataFrame(index=df.index, columns=labeling_functions)
    active_abstain_df = pd.DataFrame(index=df.index, columns=labeling_functions)
    is_abstain_df = pd.DataFrame(index=df.index, columns=labeling_functions)
    
    for i in range(num_instances):
        for lf in labeling_functions:
            p_vars_solution.loc[i, lf] = int(pulp.value(P_vars[i][lf]))
    
    correctness_solution = pd.DataFrame(index=df.index, columns=labeling_functions)
    for i in range(num_instances):
        for lf in labeling_functions:
            correctness_solution.loc[i, lf] = int(pulp.value(correctness_vars[i][lf]))
    
#     x_nlfs_solution = {lf: pulp.value(x_nlfs[lf]) for lf in nlfs}
    
    print(f"Status: {pulp.LpStatus[prob.status]}")
    print(f"pulp.value(num_labeling_functions_used) : {pulp.value(num_labeling_functions_used)}")
    
#     for i in range(num_instances):
#         for lf in labeling_functions:
#             is_abstain_df.loc[i, lf] = int(pulp.value(is_abstain[i][lf]))
#     for i in range(num_instances):
#         for lf in nlfs:
#             active_abstain_df.loc[i, lf] = int(pulp.value(active_abstain[i][lf]))
    
#     return p_vars_solution, x_nlfs_solution, pulp, prob, active_abstain_df, is_abstain_df

    return p_vars_solution, pulp, prob

In [12]:
# for c in list(combined_df):
#     print(f"{c}: {combined_df[c].value_counts().to_dict()}")

In [13]:
def create_solver_input_df_copies(lf_names_after_fix, user_input_df, res_df):
    df_copies = {}

    cols_needed = ['text', 'expected_label', 'cid']

    # Loop through each column in df2 and create a copy of df1 with modified 'expected_label'
    for lf in lf_names_after_fix:
        # Create a deep copy of df1
        df_copy = user_input_df.copy(deep=True)

        # Update the 'expected_label' column based on the corresponding column in df2
        df_copy['expected_label'] = res_df[lf].values

        # Store the modified dataframe in the dictionary with key as the labeling function name
        df_copies[lf] = df_copy[cols_needed]
    
    return df_copies



In [14]:
import math
import time 


In [15]:
def main_driver(user_input_size,
         lf_acc_thresh,
         instance_acc_thresh,
        dataset_name,
        random_state,
        funcs_dictionary):
    
    
    run_times = ['snorkel_first_run','snorkel_run_after_fix', 'solver_runtime','repair_time']
    runtime_dict = {r:0 for r in run_times}

    gen_input_tree_rules_func = funcs_dictionary[dataset_name]
    
    conn = psycopg2.connect(dbname='label', user='postgres')
    
    user_complaint_size = math.floor(user_input_size * 0.5)
    user_confirm_size = user_input_size - user_complaint_size
     
    treerules_for_user_input = gen_input_tree_rules_func()
    
    treerules = gen_input_tree_rules_func()
    
    funcs = [f.gen_label_rule() for f in treerules]
    
    first_snorkel_run_start = time.time()
    df_sentences_filtered, correct_preds_by_snorkel, wrong_preds_by_snorkel, filtered_vectors, correct_predictions, incorrect_predictions, global_accuracy, global_accuracy_on_valid =run_snorkel_with_funcs(dataset_name=dataset_name, funcs=funcs, conn=conn)
    first_snorkel_run_end = time.time()
    first_snorkel_run_time = first_snorkel_run_end - first_snorkel_run_start
    runtime_dict['snorkel_first_run'] = first_snorkel_run_time

    user_vecs, gts, user_input_df = select_user_input(user_confirm_size, user_complaint_size, random_state,
                      filtered_vectors,correct_preds_by_snorkel,
                      wrong_preds_by_snorkel, correct_predictions, incorrect_predictions)

        
    combined_df = construct_input_df_to_solver(user_vecs, gts)
    
    solver_runtime_start = time.time()
    res_df, res_pulp, res_prob = lf_constraint_solve_no_new_lf_and_non_abstain_thresh(df=combined_df, 
                lf_acc_thresh=lf_acc_thresh,
                instance_acc_thresh=instance_acc_thresh,
                expected_label_col='expected_label')
    solver_runtime_end = time.time()
    solver_runtime = solver_runtime_end - solver_runtime_start
    runtime_dict['solver_runtime'] = solver_runtime
    
    fix_book_keeping_dict = {'original_'+str(k.id):{'rule':k, 'deleted':False,
                       'pre_fix_size':k.size, 
                       'after_fix_size':k.size, 
                       'pre-deleted': False} for k in treerules}
    
    lfs_witan = [l for l in list(combined_df) if ('nlf' not in l and l!='expected_label')]
#     lfs_manual_added =  [x for x in inclusion_dict if inclusion_dict[x]==1]
#     lf_names_after_fix = lfs_witan +lfs_manual_added

    df_copies = create_solver_input_df_copies(lf_names_after_fix=lfs_witan,
                                     user_input_df=user_input_df,
                                     res_df=res_df)
    df_list = list(df_copies.values())

    book_keeping_dict_list = list(fix_book_keeping_dict)
    
    for i in range(len(df_list)):
        fix_book_keeping_dict[book_keeping_dict_list[i]]['user_input'] = df_list[i]
        fix_book_keeping_dict[book_keeping_dict_list[i]]['user_input']['id'] = \
        fix_book_keeping_dict[book_keeping_dict_list[i]]['user_input'].reset_index().index
    
    
    repair_alghorithm_start = time.time()
    fix_rules_with_solver_input(fix_book_keeping_dict=fix_book_keeping_dict)
    repair_alghorithm_end = time.time()
    repair_alghorithm_time = repair_alghorithm_end - repair_alghorithm_start
    runtime_dict['repair_time'] = repair_alghorithm_time
    
    new_trees = [x['rule'] for x in fix_book_keeping_dict.values()]
    funcs_after_fix = [f.gen_label_rule() for f in new_trees]

    snorkel_run_after_fix_start = time.time()
    new_df_sentences_filtered, correct_preds_by_snorkel, wrong_preds_by_snorkel, filtered_vectors, correct_predictions, incorrect_predictions, new_global_accuracy, new_global_accuracy_on_valid =run_snorkel_with_funcs(dataset_name=dataset_name, funcs=funcs_after_fix, conn=conn) 
    snorkel_run_after_fix_end = time.time()
    snorkel_run_after_fix_time = snorkel_run_after_fix_end - snorkel_run_after_fix_start
    runtime_dict['snorkel_run_after_fix'] = snorkel_run_after_fix_time
    
    complaints = user_input_df[user_input_df['expected_label']!=user_input_df['model_pred']]
    complant_ids = complaints['cid'].to_list()
    confirms = user_input_df[user_input_df['expected_label']==user_input_df['model_pred']]
    confirm_ids = confirms['cid'].to_list()
    
    df_confirms_after_fix = new_df_sentences_filtered[(new_df_sentences_filtered['cid'].isin(confirm_ids))]
    df_complaints_after_fix = new_df_sentences_filtered[(new_df_sentences_filtered['cid'].isin(complant_ids))]
    
    confirm_preserv_rate = len(df_confirms_after_fix[df_confirms_after_fix['expected_label']==df_confirms_after_fix['model_pred']])/len(df_confirms_after_fix)
    complain_fix_rate = len(df_complaints_after_fix[df_complaints_after_fix['expected_label']==df_complaints_after_fix['model_pred']])/len(df_complaints_after_fix)
    
    ret = {'before_fix_global_accuracy':global_accuracy,
           'user_input_size':user_input_size,
           'lf_acc_thresh':lf_acc_thresh,
           'instance_acc_thresh':instance_acc_thresh,
           'dataset_name':dataset_name,
           'random_state':random_state,
           'confirm_prev_rate':confirm_preserv_rate,
           'complain_fix_rate':complain_fix_rate,
           'new_global_accuracy':new_global_accuracy,
           'global_accuracy_on_valid_data': global_accuracy_on_valid,
          'new_global_accuracy_on_valid': new_global_accuracy_on_valid,
           'valid_global_data_size': len(df_sentences_filtered),
           'new_valid_global_data_size': len(new_df_sentences_filtered),
           'runtimes': runtime_dict
           }
    
    
    res_to_save = {'summary': ret, 'fix_details': fix_book_keeping_dict}

    # Get the current timestamp
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    with open(f'testing_agnews-no_new_lf_and_no_non_abstain_input_{dataset_name}_sample_params_{user_input_size}-{lf_acc_thresh}-{instance_acc_thresh}-{random_state}-{timestamp}.pkl', 'wb') as resf:
        pickle.dump(res_to_save, resf)
    
    conn.close()
    
    
    return fix_book_keeping_dict, res_df, gts, user_input_df, df_sentences_filtered, ret

In [16]:
# instance accuracy: |correct_predictions_from_included_lfs|/|included_lfs|
# lf accuracy: |correct_predictions_from_each_lf|/|non_abstain_preds_from_the_lf|
# instance_non_abstain_thresh: each instance cant have more than (instance_non_abstain_thresh*100)% abstains

In [17]:
import signal
import time 

class TimeoutException(Exception):
    pass

def timeout_handler(signum, frame):
    raise TimeoutException

# def run_with_params(params):
#     time.sleep(params)
#     return f"Finished params: {params}"

# def test_params_with_timeout(params_list, time_limit_minutes):
#     time_limit_seconds = int(time_limit_minutes * 60)
#     signal.signal(signal.SIGALRM, timeout_handler)
#     results = []

#     for params in params_list:
#         signal.alarm(time_limit_seconds)  # Set the timeout
#         try:
#             result = run_with_params(params)
#             print(result)
#             results.append(result)
#         except TimeoutException:
#             print(f"Params {params} exceeded time limit, moving to next.")
#         finally:
#             signal.alarm(0)  # Reset the alarm

#     return results

# Example usage
# params_list = [1, 5, 10, 2]  # Parameters that would be passed to the function
# results = test_params_with_timeout(params_list, time_limit_minutes=0.1)

In [18]:
import concurrent.futures
import time


In [19]:
def run_main_with_params(user_input_size, lf_acc_thresh, instance_acc_thresh, 
                        random_state, dataset_name, funcs_dictionary):

    fix_book_keeping_dict, res_df, gts, user_input_df, df_sentences_filtered, summary = main_driver(
        user_input_size=user_input_size,
        lf_acc_thresh=lf_acc_thresh,
        instance_acc_thresh=instance_acc_thresh,
        random_state=random_state,
        dataset_name=dataset_name,
        funcs_dictionary=funcs_dictionary
    )
    res_to_save = {'summary': summary, 'fix_details': fix_book_keeping_dict}
    return res_to_save

In [20]:
# def frange(start, stop, step):
#     while start < stop:
#         yield round(start, 10)  # Rounding to avoid floating-point precision issues
#         start += step

In [21]:
from rbbm_src.labelling_func_src.src.example_tree_rules import (
gen_amazon_funcs,
gen_professor_teacher_funcs,
gen_painter_architecht_funcs,
gen_imdb_funcs,
gen_pj_funcs,
gen_pp_funcs,
gen_yelp_funcs,
gen_plots_funcs,
gen_fakenews_funcs,
gen_dbpedia_funcs,
gen_agnews_funcs,
gen_tweets_funcs,
gen_spam_funcs
)

In [22]:
dataset_dict = {
#     "plots": gen_plots_funcs,
#     "amazon": gen_amazon_funcs,
#     "dbpedia": gen_dbpedia_funcs,
    "agnews": gen_agnews_funcs,
#     "physician_professor": gen_pp_funcs,
#     "imdb": gen_imdb_funcs,
#     "fakenews": gen_fakenews_funcs,
#     "yelp": gen_yelp_funcs,
#     "photographer_journalist": gen_pj_funcs,
#     "professor_teacher": gen_professor_teacher_funcs,
#     "painter_architect": gen_painter_architecht_funcs,
#     "tweets": gen_tweets_funcs,
#     "spam": gen_spam_funcs,
}

In [23]:
from collections import defaultdict
import psycopg2
import pandas as pd
import concurrent.futures
import time

In [24]:
def test_main_with_timeout(params_list, time_limit_minutes):
    time_limit_seconds = time_limit_minutes * 60
    results = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        for params in params_list:
            future = executor.submit(run_main_with_params, *params)
            try:
                result = future.result(timeout=time_limit_seconds)
                print(f"Params {params} finished successfully.")
                results.append(result)
            except concurrent.futures.TimeoutError:
                print(f"Params {params} exceeded the time limit, moving to the next set.")
    
    return results

In [25]:
# res_storing = defaultdict(dict)

In [26]:
# user_input_sizes = [20, 40]
# random_states = [123, 42]
# lf_acc_threshs = [0.7]
# instance_acc_threshs = [0.8]
# non_abstain_threshs = [0.5, 0.8]
# datasets = list(dataset_dict)
# func_dictionary = [dataset_dict]


# testing agnews
user_input_sizes = [20]
random_states = [1,321,4,123,6,5,2,7,8,3,42,]
lf_acc_threshs = [0.7]
instance_acc_threshs = [0.8]
# non_abstain_threshs = [0.8]
datasets = list(dataset_dict)
func_dictionary = [dataset_dict]


In [27]:
import itertools

In [29]:
input_params = list(itertools.product(
    user_input_sizes,
    lf_acc_threshs,
    instance_acc_threshs,
    random_states,
    datasets,
    func_dictionary
))

In [30]:
input_params

[(20,
  0.7,
  0.8,
  1,
  'agnews',
  {'agnews': <function rbbm_src.labelling_func_src.src.example_tree_rules.gen_agnews_funcs()>}),
 (20,
  0.7,
  0.8,
  321,
  'agnews',
  {'agnews': <function rbbm_src.labelling_func_src.src.example_tree_rules.gen_agnews_funcs()>}),
 (20,
  0.7,
  0.8,
  4,
  'agnews',
  {'agnews': <function rbbm_src.labelling_func_src.src.example_tree_rules.gen_agnews_funcs()>}),
 (20,
  0.7,
  0.8,
  123,
  'agnews',
  {'agnews': <function rbbm_src.labelling_func_src.src.example_tree_rules.gen_agnews_funcs()>}),
 (20,
  0.7,
  0.8,
  6,
  'agnews',
  {'agnews': <function rbbm_src.labelling_func_src.src.example_tree_rules.gen_agnews_funcs()>}),
 (20,
  0.7,
  0.8,
  5,
  'agnews',
  {'agnews': <function rbbm_src.labelling_func_src.src.example_tree_rules.gen_agnews_funcs()>}),
 (20,
  0.7,
  0.8,
  2,
  'agnews',
  {'agnews': <function rbbm_src.labelling_func_src.src.example_tree_rules.gen_agnews_funcs()>}),
 (20,
  0.7,
  0.8,
  7,
  'agnews',
  {'agnews': <functio

In [31]:
len(input_params)

11

In [32]:
# for i in range(0,3):
#     test_main_with_timeout(input_params, time_limit_minutes=20)

for i in range(0,3):
    test_main_with_timeout(input_params, time_limit_minutes=20)

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 21:57:52,911 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 21:57:52,922 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 21:57:52,943 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  9%|████▉                                                | 46/500 [00:00<00:02, 189.15epoch/s]INFO 2024-10-15 21:57:53,208 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 35%|██████████████████▏                                 | 175/500 [00:00<00:00, 637.28epoch/s]INFO 2024-10-15 21:57:53,283 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 21:57:53,359 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 61%|███████████████████████████████▋                    | 305/500 [00:00<00:00, 879.43epoch/s]INFO 2024-10-15 21:57:53,434 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 21:57:54,350 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/4d07c56296794f5b81f42f7c4a841b90-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/4d07c56296794f5b81f42f7c4a841b90-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/4d07c56296794f5b81f42f7c4a841b90-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/4d07c56296794f5b81f42f7c4a841b90-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 402 strengthened rows, 60 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 235 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 21:58:10,689 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 21:58:10,699 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 21:58:10,707 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.053]
INFO 2024-10-15 21:58:10,784 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 25%|████████████▋                                      | 124/500 [00:00<00:00, 1238.01epoch/s]INFO 2024-10-15 21:58:10,857 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
 53%|██████████████████████████▉                        | 264/500 [00:00<00:00, 1328.66epoch/s]INFO 2024-10-15 21:58:10,930 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 21:58:11,001 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.003]
100%|███████████████████████████████████████████████████| 500/500 [00:00<00:00, 1345.95epoch/s]
INFO 2024


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7546 wrong predictions, 

        accuracy = 0.8126427649220379 
    
Params (20, 0.7, 0.8, 1, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 21:58:27,112 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 21:58:27,123 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 21:58:27,140 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
INFO 2024-10-15 21:58:27,213 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 24%|████████████                                       | 118/500 [00:00<00:00, 1177.91epoch/s]INFO 2024-10-15 21:58:27,285 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 51%|██████████████████████████▏                        | 257/500 [00:00<00:00, 1298.87epoch/s]INFO 2024-10-15 21:58:27,358 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 79%|████████████████████████████████████████▍          | 396/500 [00:00<00:00, 1336.36epoch/s]INFO 2024-10-15 21:58:27,432 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 21:58:28,427 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/f075602dbc4e4c89b77ba46ec69aca3a-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/f075602dbc4e4c89b77ba46ec69aca3a-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/f075602dbc4e4c89b77ba46ec69aca3a-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/f075602dbc4e4c89b77ba46ec69aca3a-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 419 strengthened rows, 56 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 258 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 21:58:50,527 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 21:58:50,538 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 21:58:50,565 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.055]
  7%|███▌                                                 | 34/500 [00:00<00:02, 160.45epoch/s]INFO 2024-10-15 21:58:50,841 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 23%|████████████                                        | 116/500 [00:00<00:00, 448.45epoch/s]INFO 2024-10-15 21:58:50,929 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 46%|████████████████████████                            | 231/500 [00:00<00:00, 717.78epoch/s]INFO 2024-10-15 21:58:51,016 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 69%|███████████████████████████████████▉                | 346/500 [00:00<00:00, 869.49epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7562 wrong predictions, 

        accuracy = 0.8122455060085411 
    
Params (20, 0.7, 0.8, 321, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 21:59:13,101 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 21:59:13,111 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 21:59:13,132 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 13%|██████▊                                              | 64/500 [00:00<00:02, 211.86epoch/s]INFO 2024-10-15 21:59:13,483 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 34%|█████████████████▌                                  | 169/500 [00:00<00:00, 523.25epoch/s]INFO 2024-10-15 21:59:13,578 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 55%|████████████████████████████▋                       | 276/500 [00:00<00:00, 711.39epoch/s]INFO 2024-10-15 21:59:13,671 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 78%|████████████████████████████████████████▍           | 389/500 [00:00<00:00, 847.83epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 21:59:14,877 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/085daca382ba43788e1ccade00a42ffb-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/085daca382ba43788e1ccade00a42ffb-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/085daca382ba43788e1ccade00a42ffb-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/085daca382ba43788e1ccade00a42ffb-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 455 strengthened rows, 66 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 291 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 21:59:37,090 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 21:59:37,100 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 21:59:37,120 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.062]
  9%|████▌                                                | 43/500 [00:00<00:03, 139.35epoch/s]INFO 2024-10-15 21:59:37,498 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 25%|████████████▊                                       | 123/500 [00:00<00:00, 391.52epoch/s]INFO 2024-10-15 21:59:37,594 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 46%|███████████████████████▉                            | 230/500 [00:00<00:00, 629.90epoch/s]INFO 2024-10-15 21:59:37,687 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 68%|███████████████████████████████████▏                | 338/500 [00:00<00:00, 779.44epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7665 wrong predictions, 

        accuracy = 0.809688151752905 
    
Params (20, 0.7, 0.8, 4, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 21:59:53,931 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 21:59:53,941 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 21:59:53,947 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
INFO 2024-10-15 21:59:54,022 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 26%|█████████████▎                                     | 130/500 [00:00<00:00, 1296.62epoch/s]INFO 2024-10-15 21:59:54,096 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 53%|███████████████████████████                        | 265/500 [00:00<00:00, 1322.71epoch/s]INFO 2024-10-15 21:59:54,172 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 21:59:54,243 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|███████████████████████████████████████████████████| 500/500 [00:00<00:00, 1299.43epoch/s]
INFO 2024


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 21:59:55,328 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/d9c8c1eaa81741fab2a894ae2d408822-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/d9c8c1eaa81741fab2a894ae2d408822-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/d9c8c1eaa81741fab2a894ae2d408822-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/d9c8c1eaa81741fab2a894ae2d408822-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 440 strengthened rows, 51 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 273 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:00:11,315 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:00:11,325 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:00:11,331 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.060]
INFO 2024-10-15 22:00:11,416 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 23%|███████████▋                                       | 115/500 [00:00<00:00, 1142.79epoch/s]INFO 2024-10-15 22:00:11,498 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
 48%|████████████████████████▋                          | 242/500 [00:00<00:00, 1214.38epoch/s]INFO 2024-10-15 22:00:11,573 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 75%|██████████████████████████████████████▍            | 377/500 [00:00<00:00, 1274.86epoch/s]INFO 2024-10-15 22:00:11,647 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7619 wrong predictions, 

        accuracy = 0.8108302711292085 
    
Params (20, 0.7, 0.8, 123, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:00:33,663 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:00:33,674 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:00:33,691 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  5%|██▌                                                  | 24/500 [00:00<00:03, 121.07epoch/s]INFO 2024-10-15 22:00:33,962 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 24%|████████████▋                                       | 122/500 [00:00<00:00, 508.88epoch/s]INFO 2024-10-15 22:00:34,053 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 46%|████████████████████████▏                           | 232/500 [00:00<00:00, 738.57epoch/s]INFO 2024-10-15 22:00:34,147 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 68%|███████████████████████████████████▎                | 340/500 [00:00<00:00, 860.52epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:00:35,323 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/3150f71854e44304a5d3ba78fe0c362f-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/3150f71854e44304a5d3ba78fe0c362f-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/3150f71854e44304a5d3ba78fe0c362f-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/3150f71854e44304a5d3ba78fe0c362f-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 440 strengthened rows, 56 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 274 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:00:57,607 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:00:57,617 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:00:57,629 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.066]
 16%|████████▌                                            | 81/500 [00:00<00:01, 414.02epoch/s]INFO 2024-10-15 22:00:57,844 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 39%|████████████████████                                | 193/500 [00:00<00:00, 731.10epoch/s]INFO 2024-10-15 22:00:57,933 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:00:58,021 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 61%|███████████████████████████████▌                    | 303/500 [00:00<00:00, 872.53epoch/s]INFO 2024-10-15 22:00:58,117 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 8226 wrong predictions, 

        accuracy = 0.7957592610984209 
    
Params (20, 0.7, 0.8, 6, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:01:14,185 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:01:14,196 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:01:14,204 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 20%|██████████▍                                         | 100/500 [00:00<00:01, 354.83epoch/s]INFO 2024-10-15 22:01:14,525 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:01:14,600 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 46%|████████████████████████                            | 231/500 [00:00<00:00, 713.64epoch/s]INFO 2024-10-15 22:01:14,675 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 73%|██████████████████████████████████████▏             | 367/500 [00:00<00:00, 939.11epoch/s]INFO 2024-10-15 22:01:14,749 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:01:15,664 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/dec7f534a746484fa0e400ea9f6c09e5-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/dec7f534a746484fa0e400ea9f6c09e5-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/dec7f534a746484fa0e400ea9f6c09e5-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/dec7f534a746484fa0e400ea9f6c09e5-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 427 strengthened rows, 58 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 261 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:01:31,661 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:01:31,670 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:01:31,688 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.048]
  6%|███▎                                                 | 31/500 [00:00<00:01, 307.99epoch/s]INFO 2024-10-15 22:01:31,865 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.004]
 22%|███████████▌                                        | 111/500 [00:00<00:00, 593.83epoch/s]INFO 2024-10-15 22:01:31,950 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 46%|███████████████████████▉                            | 230/500 [00:00<00:00, 863.52epoch/s]INFO 2024-10-15 22:01:32,034 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 70%|████████████████████████████████████▍               | 350/500 [00:00<00:00, 994.60epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 31729 wrong predictions, 

        accuracy = 0.21221074585361008 
    
Params (20, 0.7, 0.8, 5, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:01:50,816 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:01:50,826 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:01:50,845 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 15%|████████▏                                            | 77/500 [00:00<00:01, 269.68epoch/s]INFO 2024-10-15 22:01:51,176 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 38%|███████████████████▉                                | 192/500 [00:00<00:00, 604.02epoch/s]INFO 2024-10-15 22:01:51,261 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:01:51,345 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 62%|████████████████████████████████▏                   | 309/500 [00:00<00:00, 802.29epoch/s]INFO 2024-10-15 22:01:51,430 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:01:52,404 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/548078847bf841a18dfca14d6d55a2c1-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/548078847bf841a18dfca14d6d55a2c1-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/548078847bf841a18dfca14d6d55a2c1-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/548078847bf841a18dfca14d6d55a2c1-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 417 strengthened rows, 68 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 257 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:02:08,633 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:02:08,643 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:02:08,653 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.052]
INFO 2024-10-15 22:02:08,737 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.004]
 22%|███████████▎                                       | 111/500 [00:00<00:00, 1101.68epoch/s]INFO 2024-10-15 22:02:08,821 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 46%|███████████████████████▌                           | 231/500 [00:00<00:00, 1156.03epoch/s]INFO 2024-10-15 22:02:08,906 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 70%|███████████████████████████████████▋               | 350/500 [00:00<00:00, 1168.08epoch/s]INFO 2024-10-15 22:02:08,990 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7523 wrong predictions, 

        accuracy = 0.8132138246101897 
    
Params (20, 0.7, 0.8, 2, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:02:24,859 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:02:24,868 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:02:24,900 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 13%|██████▋                                              | 63/500 [00:00<00:01, 238.10epoch/s]INFO 2024-10-15 22:02:25,214 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 35%|██████████████████▏                                 | 175/500 [00:00<00:00, 580.48epoch/s]INFO 2024-10-15 22:02:25,302 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 58%|█████████████████████████████▉                      | 288/500 [00:00<00:00, 774.93epoch/s]INFO 2024-10-15 22:02:25,395 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:02:25,479 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:02:26,664 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/ebe5ae10c4fa481e983a2f8a9c7f8469-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/ebe5ae10c4fa481e983a2f8a9c7f8469-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/ebe5ae10c4fa481e983a2f8a9c7f8469-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/ebe5ae10c4fa481e983a2f8a9c7f8469-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 394 strengthened rows, 70 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 222 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:02:49,733 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:02:49,744 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:02:49,762 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.042]
  9%|████▋                                                | 44/500 [00:00<00:02, 175.57epoch/s]INFO 2024-10-15 22:02:50,041 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 28%|██████████████▌                                     | 140/500 [00:00<00:00, 488.61epoch/s]INFO 2024-10-15 22:02:50,181 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.005]
 42%|██████████████████████                              | 212/500 [00:00<00:00, 571.81epoch/s]INFO 2024-10-15 22:02:50,266 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.003]
 66%|██████████████████████████████████▎                 | 330/500 [00:00<00:00, 778.91epoc


        out of 60000 sentences, 39808 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 31106 wrong predictions, 

        accuracy = 0.2185992765273312 
    
Params (20, 0.7, 0.8, 7, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:03:11,529 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:03:11,539 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:03:11,567 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  9%|████▉                                                | 47/500 [00:00<00:02, 159.99epoch/s]INFO 2024-10-15 22:03:11,906 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 32%|████████████████▍                                   | 158/500 [00:00<00:00, 529.08epoch/s]INFO 2024-10-15 22:03:11,982 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 58%|██████████████████████████████▎                     | 291/500 [00:00<00:00, 813.57epoch/s]INFO 2024-10-15 22:03:12,059 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:03:12,133 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:03:13,145 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/4e6922fa1e59410e878086eb7bd79078-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/4e6922fa1e59410e878086eb7bd79078-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/4e6922fa1e59410e878086eb7bd79078-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/4e6922fa1e59410e878086eb7bd79078-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 441 strengthened rows, 58 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 271 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:03:28,939 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:03:28,949 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:03:28,962 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.054]
 18%|█████████▊                                           | 92/500 [00:00<00:00, 915.84epoch/s]INFO 2024-10-15 22:03:29,061 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:03:29,144 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 42%|█████████████████████▍                             | 210/500 [00:00<00:00, 1070.27epoch/s]INFO 2024-10-15 22:03:29,257 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 64%|█████████████████████████████████                   | 318/500 [00:00<00:00, 984.77epoch/s]INFO 2024-10-15 22:03:29,341 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7657 wrong predictions, 

        accuracy = 0.8098867812096534 
    
Params (20, 0.7, 0.8, 8, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:03:52,026 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:03:52,036 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:03:52,056 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  6%|██▉                                                  | 28/500 [00:00<00:03, 121.56epoch/s]INFO 2024-10-15 22:03:52,330 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 30%|███████████████▌                                    | 150/500 [00:00<00:00, 584.06epoch/s]INFO 2024-10-15 22:03:52,406 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 55%|████████████████████████████▌                       | 275/500 [00:00<00:00, 833.28epoch/s]INFO 2024-10-15 22:03:52,490 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 79%|████████████████████████████████████████▉           | 394/500 [00:00<00:00, 955.30epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:03:53,619 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/ec6e4f4c21914705b240e3b023de82bc-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/ec6e4f4c21914705b240e3b023de82bc-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/ec6e4f4c21914705b240e3b023de82bc-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/ec6e4f4c21914705b240e3b023de82bc-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 406 strengthened rows, 60 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 232 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:04:15,804 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:04:15,815 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:04:15,832 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.048]
 17%|████████▉                                            | 84/500 [00:00<00:01, 270.90epoch/s]INFO 2024-10-15 22:04:16,162 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.007]
 40%|████████████████████▋                               | 199/500 [00:00<00:00, 599.25epoch/s]INFO 2024-10-15 22:04:16,248 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.006]
INFO 2024-10-15 22:04:16,321 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.005]
 67%|██████████████████████████████████▋                 | 334/500 [00:00<00:00, 859.49epoch/s]INFO 2024-10-15 22:04:16,395 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.004]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 8115 wrong predictions, 

        accuracy = 0.7985152448108055 
    
Params (20, 0.7, 0.8, 3, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:04:32,096 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:04:32,106 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:04:32,119 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 11%|█████▊                                               | 55/500 [00:00<00:01, 279.56epoch/s]INFO 2024-10-15 22:04:32,364 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 32%|████████████████▌                                   | 159/500 [00:00<00:00, 623.00epoch/s]INFO 2024-10-15 22:04:32,448 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 56%|█████████████████████████████                       | 279/500 [00:00<00:00, 846.59epoch/s]INFO 2024-10-15 22:04:32,532 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 80%|█████████████████████████████████████████▍          | 399/500 [00:00<00:00, 972.15epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:04:33,543 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/1b009a0ef6334104bf8352de938efcce-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/1b009a0ef6334104bf8352de938efcce-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/1b009a0ef6334104bf8352de938efcce-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/1b009a0ef6334104bf8352de938efcce-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 424 strengthened rows, 67 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 268 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:04:49,576 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:04:49,585 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:04:49,606 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.037]
  7%|███▌                                                 | 34/500 [00:00<00:02, 167.81epoch/s]INFO 2024-10-15 22:04:49,862 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 30%|███████████████▌                                    | 150/500 [00:00<00:00, 604.89epoch/s]INFO 2024-10-15 22:04:49,947 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 54%|████████████████████████████▏                       | 271/500 [00:00<00:00, 837.30epoch/s]INFO 2024-10-15 22:04:50,030 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 78%|████████████████████████████████████████▊           | 392/500 [00:00<00:00, 967.64epoc


        out of 60000 sentences, 36291 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7072 wrong predictions, 

        accuracy = 0.8051307486704693 
    
Params (20, 0.7, 0.8, 42, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:05:06,285 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:05:06,294 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:05:06,322 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  4%|██▎                                                  | 22/500 [00:00<00:02, 217.08epoch/s]INFO 2024-10-15 22:05:06,462 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 31%|███████████████▉                                    | 153/500 [00:00<00:00, 854.91epoch/s]INFO 2024-10-15 22:05:06,533 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 59%|██████████████████████████████▎                    | 297/500 [00:00<00:00, 1118.42epoch/s]INFO 2024-10-15 22:05:06,604 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:05:06,685 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:05:07,714 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/84846aaeb57c468ab787415e8a42a96b-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/84846aaeb57c468ab787415e8a42a96b-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/84846aaeb57c468ab787415e8a42a96b-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/84846aaeb57c468ab787415e8a42a96b-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 402 strengthened rows, 60 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 235 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:05:23,579 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:05:23,589 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:05:23,617 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.053]
 12%|██████▏                                              | 58/500 [00:00<00:02, 203.93epoch/s]INFO 2024-10-15 22:05:23,938 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 38%|███████████████████▉                                | 192/500 [00:00<00:00, 644.98epoch/s]INFO 2024-10-15 22:05:24,007 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:05:24,075 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.003]
 67%|███████████████████████████████████                 | 337/500 [00:00<00:00, 931.79epoch/s]INFO 2024-10-15 22:05:24,144 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.003]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7546 wrong predictions, 

        accuracy = 0.8126427649220379 
    
Params (20, 0.7, 0.8, 1, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:05:40,303 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:05:40,314 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:05:40,329 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 18%|█████████▊                                           | 92/500 [00:00<00:01, 328.76epoch/s]INFO 2024-10-15 22:05:40,646 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 39%|████████████████████▏                               | 194/500 [00:00<00:00, 314.71epoch/s]INFO 2024-10-15 22:05:40,980 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 52%|███████████████████████████▏                        | 261/500 [00:00<00:00, 312.95epoch/s]INFO 2024-10-15 22:05:41,259 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 76%|███████████████████████████████████████▋            | 382/500 [00:01<00:00, 359.77epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:05:42,876 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/6eb436da5fd84521992feaa94e043eeb-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/6eb436da5fd84521992feaa94e043eeb-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/6eb436da5fd84521992feaa94e043eeb-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/6eb436da5fd84521992feaa94e043eeb-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 419 strengthened rows, 56 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 258 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:05:58,818 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:05:58,829 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:05:58,861 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.055]
 11%|█████▋                                               | 54/500 [00:00<00:02, 180.67epoch/s]INFO 2024-10-15 22:05:59,180 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 37%|███████████████████                                 | 183/500 [00:00<00:00, 606.73epoch/s]INFO 2024-10-15 22:05:59,257 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:05:59,333 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 63%|████████████████████████████████▋                   | 314/500 [00:00<00:00, 854.76epoch/s]INFO 2024-10-15 22:05:59,410 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7562 wrong predictions, 

        accuracy = 0.8122455060085411 
    
Params (20, 0.7, 0.8, 321, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:06:15,249 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:06:15,258 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:06:15,262 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
INFO 2024-10-15 22:06:15,331 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 29%|██████████████▊                                    | 145/500 [00:00<00:00, 1448.75epoch/s]INFO 2024-10-15 22:06:15,399 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 59%|█████████████████████████████▉                     | 293/500 [00:00<00:00, 1467.05epoch/s]INFO 2024-10-15 22:06:15,467 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:06:15,535 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|███████████████████████████████████████████████████| 500/500 [00:00<00:00, 1463.99epoch/s]
INFO 2024


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:06:16,443 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/5c883f484f5c4950b18253f9890b8ace-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/5c883f484f5c4950b18253f9890b8ace-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/5c883f484f5c4950b18253f9890b8ace-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/5c883f484f5c4950b18253f9890b8ace-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 455 strengthened rows, 66 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 291 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:06:33,458 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:06:33,468 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:06:33,490 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.062]
 19%|██████████▏                                          | 96/500 [00:00<00:00, 955.71epoch/s]INFO 2024-10-15 22:06:33,577 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:06:33,661 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 43%|█████████████████████▋                             | 213/500 [00:00<00:00, 1078.67epoch/s]INFO 2024-10-15 22:06:33,748 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 66%|█████████████████████████████████▋                 | 330/500 [00:00<00:00, 1116.75epoch/s]INFO 2024-10-15 22:06:33,832 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7665 wrong predictions, 

        accuracy = 0.809688151752905 
    
Params (20, 0.7, 0.8, 4, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:06:56,313 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:06:56,324 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:06:56,346 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 18%|█████████▍                                           | 89/500 [00:00<00:00, 889.22epoch/s]INFO 2024-10-15 22:06:56,438 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:06:56,515 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 43%|█████████████████████▊                             | 214/500 [00:00<00:00, 1096.48epoch/s]INFO 2024-10-15 22:06:56,588 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 71%|████████████████████████████████████               | 354/500 [00:00<00:00, 1234.26epoch/s]INFO 2024-10-15 22:06:56,664 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:06:57,642 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/c9ae71b6985141148a5daecae7634aaa-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/c9ae71b6985141148a5daecae7634aaa-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/c9ae71b6985141148a5daecae7634aaa-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/c9ae71b6985141148a5daecae7634aaa-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 440 strengthened rows, 51 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 273 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:07:19,186 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:07:19,197 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:07:19,216 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.060]
 11%|█████▌                                               | 53/500 [00:00<00:02, 190.36epoch/s]INFO 2024-10-15 22:07:19,562 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 37%|███████████████████▏                                | 185/500 [00:00<00:00, 615.87epoch/s]INFO 2024-10-15 22:07:19,637 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:07:19,711 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 63%|████████████████████████████████▉                   | 317/500 [00:00<00:00, 861.16epoch/s]INFO 2024-10-15 22:07:19,782 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7619 wrong predictions, 

        accuracy = 0.8108302711292085 
    
Params (20, 0.7, 0.8, 123, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:07:36,111 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:07:36,121 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:07:36,125 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
INFO 2024-10-15 22:07:36,194 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 29%|██████████████▌                                    | 143/500 [00:00<00:00, 1424.33epoch/s]INFO 2024-10-15 22:07:36,264 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 58%|█████████████████████████████▍                     | 289/500 [00:00<00:00, 1439.48epoch/s]INFO 2024-10-15 22:07:36,333 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:07:36,401 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|███████████████████████████████████████████████████| 500/500 [00:00<00:00, 1447.00epoch/s]
INFO 2024


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:07:37,354 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/ad9c483c729648318444c8bf8cca74fb-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/ad9c483c729648318444c8bf8cca74fb-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/ad9c483c729648318444c8bf8cca74fb-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/ad9c483c729648318444c8bf8cca74fb-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 440 strengthened rows, 56 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 274 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:07:52,980 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:07:52,990 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:07:52,998 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.066]
 10%|█████▌                                               | 52/500 [00:00<00:00, 518.34epoch/s]INFO 2024-10-15 22:07:53,128 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 34%|█████████████████▉                                  | 172/500 [00:00<00:00, 917.46epoch/s]INFO 2024-10-15 22:07:53,216 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:07:53,292 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 60%|██████████████████████████████▊                    | 302/500 [00:00<00:00, 1089.54epoch/s]INFO 2024-10-15 22:07:53,368 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 8226 wrong predictions, 

        accuracy = 0.7957592610984209 
    
Params (20, 0.7, 0.8, 6, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:08:09,707 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:08:09,717 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:08:09,750 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 15%|███████▋                                             | 73/500 [00:00<00:02, 166.86epoch/s]INFO 2024-10-15 22:08:10,176 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:08:10,245 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 43%|██████████████████████▎                             | 215/500 [00:00<00:00, 599.80epoch/s]INFO 2024-10-15 22:08:10,314 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 72%|█████████████████████████████████████▌              | 361/500 [00:00<00:00, 883.32epoch/s]INFO 2024-10-15 22:08:10,384 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:08:11,413 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/4a118c53dfa6463ba746eb3b840ac03c-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/4a118c53dfa6463ba746eb3b840ac03c-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/4a118c53dfa6463ba746eb3b840ac03c-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/4a118c53dfa6463ba746eb3b840ac03c-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 427 strengthened rows, 58 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 261 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:08:33,569 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:08:33,579 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:08:33,600 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.048]
  9%|████▌                                                | 43/500 [00:00<00:03, 137.76epoch/s]INFO 2024-10-15 22:08:33,978 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.004]
 26%|█████████████▎                                      | 128/500 [00:00<00:00, 408.19epoch/s]INFO 2024-10-15 22:08:34,058 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 51%|██████████████████████████▌                         | 256/500 [00:00<00:00, 713.41epoch/s]INFO 2024-10-15 22:08:34,136 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 77%|████████████████████████████████████████▏           | 386/500 [00:00<00:00, 908.53epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 31729 wrong predictions, 

        accuracy = 0.21221074585361008 
    
Params (20, 0.7, 0.8, 5, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:08:56,673 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:08:56,683 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:08:56,714 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  8%|████▎                                                | 41/500 [00:00<00:02, 197.18epoch/s]INFO 2024-10-15 22:08:56,981 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 24%|████████████▍                                       | 119/500 [00:00<00:00, 450.54epoch/s]INFO 2024-10-15 22:08:57,076 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 46%|███████████████████████▋                            | 228/500 [00:00<00:00, 695.64epoch/s]INFO 2024-10-15 22:08:57,167 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 68%|███████████████████████████████████▎                | 339/500 [00:00<00:00, 842.07epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:08:58,287 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/b7eac3ad56a74f05955a47d2672e44fe-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/b7eac3ad56a74f05955a47d2672e44fe-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/b7eac3ad56a74f05955a47d2672e44fe-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/b7eac3ad56a74f05955a47d2672e44fe-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 417 strengthened rows, 68 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 257 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:09:20,308 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:09:20,318 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:09:20,357 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.052]
 11%|██████                                               | 57/500 [00:00<00:03, 135.84epoch/s]INFO 2024-10-15 22:09:20,794 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.004]
 34%|█████████████████▍                                  | 168/500 [00:00<00:00, 433.17epoch/s]INFO 2024-10-15 22:09:20,873 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 60%|███████████████████████████████                     | 299/500 [00:00<00:00, 698.26epoch/s]INFO 2024-10-15 22:09:20,950 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:09:21,027 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7523 wrong predictions, 

        accuracy = 0.8132138246101897 
    
Params (20, 0.7, 0.8, 2, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:09:37,048 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:09:37,059 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:09:37,076 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 18%|█████████▍                                           | 89/500 [00:00<00:01, 337.60epoch/s]INFO 2024-10-15 22:09:37,380 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:09:37,458 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 42%|█████████████████████▉                              | 211/500 [00:00<00:00, 680.07epoch/s]INFO 2024-10-15 22:09:37,557 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 62%|████████████████████████████████▍                   | 312/500 [00:00<00:00, 795.63epoch/s]INFO 2024-10-15 22:09:37,662 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:09:38,625 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/e7164260fd1f43ada46b0ee0f6c9e53b-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/e7164260fd1f43ada46b0ee0f6c9e53b-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/e7164260fd1f43ada46b0ee0f6c9e53b-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/e7164260fd1f43ada46b0ee0f6c9e53b-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 394 strengthened rows, 70 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 222 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:10:01,196 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:10:01,207 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:10:01,248 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.042]
 19%|█████████▊                                           | 93/500 [00:00<00:00, 505.27epoch/s]INFO 2024-10-15 22:10:01,433 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
INFO 2024-10-15 22:10:01,514 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.005]
 42%|█████████████████████▋                              | 209/500 [00:00<00:00, 795.48epoch/s]INFO 2024-10-15 22:10:01,596 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.003]
 67%|██████████████████████████████████▉                 | 336/500 [00:00<00:00, 978.60epoch/s]INFO 2024-10-15 22:10:01,674 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 39808 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 31106 wrong predictions, 

        accuracy = 0.2185992765273312 
    
Params (20, 0.7, 0.8, 7, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:10:17,926 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:10:17,936 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:10:17,954 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 14%|███████▍                                             | 70/500 [00:00<00:01, 381.98epoch/s]INFO 2024-10-15 22:10:18,178 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 40%|████████████████████▌                               | 198/500 [00:00<00:00, 777.81epoch/s]INFO 2024-10-15 22:10:18,257 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:10:18,336 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 65%|█████████████████████████████████▌                  | 323/500 [00:00<00:00, 959.38epoch/s]INFO 2024-10-15 22:10:18,414 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:10:19,460 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/d27d671f3204412f90468731fcf51a68-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/d27d671f3204412f90468731fcf51a68-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/d27d671f3204412f90468731fcf51a68-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/d27d671f3204412f90468731fcf51a68-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 441 strengthened rows, 58 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 271 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:10:41,466 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:10:41,476 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:10:41,499 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.054]
  7%|███▌                                                 | 34/500 [00:00<00:04, 106.79epoch/s]INFO 2024-10-15 22:10:41,873 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 22%|███████████▌                                        | 111/500 [00:00<00:01, 354.74epoch/s]INFO 2024-10-15 22:10:41,977 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 48%|████████████████████████▊                           | 238/500 [00:00<00:00, 672.96epoch/s]INFO 2024-10-15 22:10:42,055 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 73%|██████████████████████████████████████▏             | 367/500 [00:00<00:00, 876.92epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7657 wrong predictions, 

        accuracy = 0.8098867812096534 
    
Params (20, 0.7, 0.8, 8, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:10:58,423 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:10:58,433 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:10:58,452 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  7%|███▌                                                 | 34/500 [00:00<00:01, 328.66epoch/s]INFO 2024-10-15 22:10:58,589 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 34%|█████████████████▊                                  | 171/500 [00:00<00:00, 932.37epoch/s]INFO 2024-10-15 22:10:58,661 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:10:58,729 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 63%|████████████████████████████████                   | 314/500 [00:00<00:00, 1157.06epoch/s]INFO 2024-10-15 22:10:58,799 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:10:59,835 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/633abe2c26fb46b9ba3fe28306cb7cd3-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/633abe2c26fb46b9ba3fe28306cb7cd3-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/633abe2c26fb46b9ba3fe28306cb7cd3-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/633abe2c26fb46b9ba3fe28306cb7cd3-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 406 strengthened rows, 60 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 232 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:11:21,987 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:11:21,997 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:11:22,011 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.048]
 11%|█████▊                                               | 55/500 [00:00<00:01, 272.69epoch/s]INFO 2024-10-15 22:11:22,248 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.007]
 34%|█████████████████▌                                  | 169/500 [00:00<00:00, 659.86epoch/s]INFO 2024-10-15 22:11:22,334 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.006]
 57%|█████████████████████████████▊                      | 287/500 [00:00<00:00, 862.14epoch/s]INFO 2024-10-15 22:11:22,419 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.005]
INFO 2024-10-15 22:11:22,497 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.004]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 8115 wrong predictions, 

        accuracy = 0.7985152448108055 
    
Params (20, 0.7, 0.8, 3, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:11:38,659 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:11:38,669 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:11:38,682 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  8%|████▎                                                | 41/500 [00:00<00:02, 201.96epoch/s]INFO 2024-10-15 22:11:38,978 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 20%|██████████▌                                         | 102/500 [00:00<00:01, 382.29epoch/s]INFO 2024-10-15 22:11:39,059 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 44%|███████████████████████                             | 222/500 [00:00<00:00, 699.88epoch/s]INFO 2024-10-15 22:11:39,142 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 70%|████████████████████████████████████▌               | 351/500 [00:00<00:00, 909.18epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:11:40,171 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/eb53c43cc2974197b150a73c347e2b79-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/eb53c43cc2974197b150a73c347e2b79-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/eb53c43cc2974197b150a73c347e2b79-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/eb53c43cc2974197b150a73c347e2b79-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 424 strengthened rows, 67 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 268 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:11:56,590 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:11:56,600 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:11:56,647 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.037]
 19%|██████████▎                                          | 97/500 [00:00<00:00, 531.36epoch/s]INFO 2024-10-15 22:11:56,809 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:11:56,885 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 45%|███████████████████████▌                            | 226/500 [00:00<00:00, 875.97epoch/s]INFO 2024-10-15 22:11:56,958 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 74%|█████████████████████████████████████▋             | 369/500 [00:00<00:00, 1092.26epoch/s]INFO 2024-10-15 22:11:57,029 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.001]
100%|█████


        out of 60000 sentences, 36291 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7072 wrong predictions, 

        accuracy = 0.8051307486704693 
    
Params (20, 0.7, 0.8, 42, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:12:13,031 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:12:13,041 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:12:13,052 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 14%|███████▍                                             | 70/500 [00:00<00:01, 265.32epoch/s]INFO 2024-10-15 22:12:13,323 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:12:13,395 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 41%|█████████████████████▍                              | 206/500 [00:00<00:00, 689.48epoch/s]INFO 2024-10-15 22:12:13,465 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 70%|████████████████████████████████████▍               | 350/500 [00:00<00:00, 955.91epoch/s]INFO 2024-10-15 22:12:13,537 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:12:14,485 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/5419cab69ead4c498032942ce97fc43e-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/5419cab69ead4c498032942ce97fc43e-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/5419cab69ead4c498032942ce97fc43e-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/5419cab69ead4c498032942ce97fc43e-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 402 strengthened rows, 60 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 235 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:12:30,737 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:12:30,747 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:12:30,752 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.053]
INFO 2024-10-15 22:12:30,834 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 24%|████████████▍                                      | 122/500 [00:00<00:00, 1217.52epoch/s]INFO 2024-10-15 22:12:30,923 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
 49%|████████████████████████▉                          | 244/500 [00:00<00:00, 1134.93epoch/s]INFO 2024-10-15 22:12:31,014 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.003]
 72%|████████████████████████████████████▌              | 358/500 [00:00<00:00, 1127.32epoch/s]INFO 2024-10-15 22:12:31,104 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.003]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7546 wrong predictions, 

        accuracy = 0.8126427649220379 
    
Params (20, 0.7, 0.8, 1, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:12:47,232 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:12:47,242 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:12:47,267 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 17%|█████████                                            | 86/500 [00:00<00:00, 462.91epoch/s]INFO 2024-10-15 22:12:47,457 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:12:47,526 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 46%|███████████████████████▋                            | 228/500 [00:00<00:00, 897.70epoch/s]INFO 2024-10-15 22:12:47,596 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 75%|██████████████████████████████████████▏            | 374/500 [00:00<00:00, 1117.35epoch/s]INFO 2024-10-15 22:12:47,665 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:12:48,642 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/5b641b0b5fe04513adf131e2e7d12533-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/5b641b0b5fe04513adf131e2e7d12533-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/5b641b0b5fe04513adf131e2e7d12533-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/5b641b0b5fe04513adf131e2e7d12533-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 419 strengthened rows, 56 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 258 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:13:04,840 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:13:04,850 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:13:04,868 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.055]
INFO 2024-10-15 22:13:04,941 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 23%|███████████▉                                       | 117/500 [00:00<00:00, 1168.27epoch/s]INFO 2024-10-15 22:13:05,010 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 53%|███████████████████████████                        | 265/500 [00:00<00:00, 1347.56epoch/s]INFO 2024-10-15 22:13:05,078 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:13:05,146 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|███████████████████████████████████████████████████| 500/500 [00:00<00:00, 1387.94epoch/s]
INFO 2024


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7562 wrong predictions, 

        accuracy = 0.8122455060085411 
    
Params (20, 0.7, 0.8, 321, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:13:21,130 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:13:21,140 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:13:21,176 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  6%|███▎                                                 | 31/500 [00:00<00:03, 137.76epoch/s]INFO 2024-10-15 22:13:21,430 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 30%|███████████████▌                                    | 150/500 [00:00<00:00, 585.06epoch/s]INFO 2024-10-15 22:13:21,508 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 56%|█████████████████████████████                       | 280/500 [00:00<00:00, 853.52epoch/s]INFO 2024-10-15 22:13:21,586 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:13:21,662 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:13:22,648 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/b99d412e8abe46d0b17badae2a559d79-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/b99d412e8abe46d0b17badae2a559d79-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/b99d412e8abe46d0b17badae2a559d79-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/b99d412e8abe46d0b17badae2a559d79-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 455 strengthened rows, 66 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 291 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:13:38,875 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:13:38,884 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:13:38,900 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.062]
  8%|████▎                                                | 41/500 [00:00<00:02, 210.62epoch/s]INFO 2024-10-15 22:13:39,142 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 33%|█████████████████                                   | 164/500 [00:00<00:00, 671.82epoch/s]INFO 2024-10-15 22:13:39,217 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 56%|█████████████████████████████                       | 280/500 [00:00<00:00, 861.99epoch/s]INFO 2024-10-15 22:13:39,306 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:13:39,374 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7665 wrong predictions, 

        accuracy = 0.809688151752905 
    
Params (20, 0.7, 0.8, 4, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:13:55,391 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:13:55,401 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:13:55,410 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 13%|██████▉                                              | 66/500 [00:00<00:01, 342.06epoch/s]INFO 2024-10-15 22:13:55,639 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:13:55,709 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 41%|█████████████████████▌                              | 207/500 [00:00<00:00, 816.96epoch/s]INFO 2024-10-15 22:13:55,778 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 71%|████████████████████████████████████               | 353/500 [00:00<00:00, 1066.22epoch/s]INFO 2024-10-15 22:13:55,848 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:13:56,869 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/7c7ae743b91d4bb19e243b52b7117cba-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/7c7ae743b91d4bb19e243b52b7117cba-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/7c7ae743b91d4bb19e243b52b7117cba-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/7c7ae743b91d4bb19e243b52b7117cba-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 440 strengthened rows, 51 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 273 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:14:12,345 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:14:12,356 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:14:12,366 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.060]
 18%|█████████▋                                           | 91/500 [00:00<00:01, 214.89epoch/s]INFO 2024-10-15 22:14:12,842 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 35%|██████████████████                                  | 174/500 [00:00<00:00, 422.44epoch/s]INFO 2024-10-15 22:14:12,920 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:14:12,995 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 61%|███████████████████████████████▋                    | 305/500 [00:00<00:00, 710.15epoch/s]INFO 2024-10-15 22:14:13,068 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7619 wrong predictions, 

        accuracy = 0.8108302711292085 
    
Params (20, 0.7, 0.8, 123, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:14:29,185 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:14:29,195 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:14:29,211 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 11%|█████▊                                               | 55/500 [00:00<00:02, 165.85epoch/s]INFO 2024-10-15 22:14:29,583 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 32%|████████████████▋                                   | 161/500 [00:00<00:00, 476.47epoch/s]INFO 2024-10-15 22:14:29,683 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 60%|██████████████████████████████▉                     | 298/500 [00:00<00:00, 774.57epoch/s]INFO 2024-10-15 22:14:29,756 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:14:29,824 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:14:30,799 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/506d74fbc42d4118a4845728fc244f5b-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/506d74fbc42d4118a4845728fc244f5b-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/506d74fbc42d4118a4845728fc244f5b-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/506d74fbc42d4118a4845728fc244f5b-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 440 strengthened rows, 56 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 274 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:14:47,003 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:14:47,013 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:14:47,046 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.066]
  6%|███▍                                                 | 32/500 [00:00<00:02, 159.28epoch/s]INFO 2024-10-15 22:14:47,308 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 24%|████████████▎                                       | 118/500 [00:00<00:00, 474.38epoch/s]INFO 2024-10-15 22:14:47,380 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
 51%|██████████████████████████▌                         | 255/500 [00:00<00:00, 822.41epoch/s]INFO 2024-10-15 22:14:47,458 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 77%|███████████████████████████████████████▉            | 384/500 [00:00<00:00, 987.34epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 8226 wrong predictions, 

        accuracy = 0.7957592610984209 
    
Params (20, 0.7, 0.8, 6, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:15:03,505 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:15:03,514 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:15:03,526 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 13%|██████▋                                              | 63/500 [00:00<00:01, 312.72epoch/s]INFO 2024-10-15 22:15:03,806 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 23%|████████████▏                                       | 117/500 [00:00<00:00, 414.56epoch/s]INFO 2024-10-15 22:15:03,885 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 49%|█████████████████████████▍                          | 244/500 [00:00<00:00, 749.89epoch/s]INFO 2024-10-15 22:15:03,967 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 73%|██████████████████████████████████████▏             | 367/500 [00:00<00:00, 920.01epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:15:05,112 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/185636f302ac44b793cd00f296cd4bf9-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/185636f302ac44b793cd00f296cd4bf9-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/185636f302ac44b793cd00f296cd4bf9-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/185636f302ac44b793cd00f296cd4bf9-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 427 strengthened rows, 58 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 261 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:15:21,134 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:15:21,144 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:15:21,171 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.048]
 10%|█████▏                                               | 49/500 [00:00<00:01, 237.06epoch/s]INFO 2024-10-15 22:15:21,434 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.004]
 26%|█████████████▌                                      | 131/500 [00:00<00:00, 495.18epoch/s]INFO 2024-10-15 22:15:21,507 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 54%|████████████████████████████▏                       | 271/500 [00:00<00:00, 845.99epoch/s]INFO 2024-10-15 22:15:21,578 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:15:21,648 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 31729 wrong predictions, 

        accuracy = 0.21221074585361008 
    
Params (20, 0.7, 0.8, 5, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:15:37,732 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:15:37,741 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:15:37,759 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 18%|█████████▍                                           | 89/500 [00:00<00:01, 261.99epoch/s]INFO 2024-10-15 22:15:38,120 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:15:38,198 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 42%|█████████████████████▊                              | 210/500 [00:00<00:00, 595.84epoch/s]INFO 2024-10-15 22:15:38,276 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 68%|███████████████████████████████████▎                | 340/500 [00:00<00:00, 830.92epoch/s]INFO 2024-10-15 22:15:38,354 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:15:39,308 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/dd19112bd4b444028b1de557986bf811-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/dd19112bd4b444028b1de557986bf811-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/dd19112bd4b444028b1de557986bf811-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/dd19112bd4b444028b1de557986bf811-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 417 strengthened rows, 68 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 257 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:15:55,631 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:15:55,641 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:15:55,652 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.052]
  6%|███▎                                                 | 31/500 [00:00<00:01, 309.55epoch/s]INFO 2024-10-15 22:15:55,805 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.004]
 30%|███████████████▋                                    | 151/500 [00:00<00:00, 829.74epoch/s]INFO 2024-10-15 22:15:55,883 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 56%|████████████████████████████▋                      | 281/500 [00:00<00:00, 1043.79epoch/s]INFO 2024-10-15 22:15:55,960 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:15:56,033 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7523 wrong predictions, 

        accuracy = 0.8132138246101897 
    
Params (20, 0.7, 0.8, 2, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:16:11,993 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:16:12,003 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:16:12,025 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  9%|████▋                                                | 44/500 [00:00<00:03, 150.38epoch/s]INFO 2024-10-15 22:16:12,374 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 34%|█████████████████▌                                  | 169/500 [00:00<00:00, 568.99epoch/s]INFO 2024-10-15 22:16:12,453 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 60%|███████████████████████████████                     | 299/500 [00:00<00:00, 826.17epoch/s]INFO 2024-10-15 22:16:12,530 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:16:12,606 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:16:13,641 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/e6c141a2d5534de88c5b3fab94dbf2d9-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/e6c141a2d5534de88c5b3fab94dbf2d9-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/e6c141a2d5534de88c5b3fab94dbf2d9-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/e6c141a2d5534de88c5b3fab94dbf2d9-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 394 strengthened rows, 70 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 222 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:16:30,028 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:16:30,039 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:16:30,071 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.042]
  9%|████▋                                                | 44/500 [00:00<00:02, 157.47epoch/s]INFO 2024-10-15 22:16:30,397 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 33%|█████████████████▎                                  | 166/500 [00:00<00:00, 571.77epoch/s]INFO 2024-10-15 22:16:30,476 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.005]
 59%|██████████████████████████████▌                     | 294/500 [00:00<00:00, 824.98epoch/s]INFO 2024-10-15 22:16:30,555 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:16:30,631 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 39808 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 31106 wrong predictions, 

        accuracy = 0.2185992765273312 
    
Params (20, 0.7, 0.8, 7, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:16:46,399 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:16:46,410 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:16:46,435 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  9%|████▌                                                | 43/500 [00:00<00:03, 121.74epoch/s]INFO 2024-10-15 22:16:46,807 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 34%|█████████████████▍                                  | 168/500 [00:00<00:00, 527.65epoch/s]INFO 2024-10-15 22:16:46,886 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 58%|██████████████████████████████▎                     | 292/500 [00:00<00:00, 769.50epoch/s]INFO 2024-10-15 22:16:46,968 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:16:47,046 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:16:48,047 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/5a33623abac44a1f95f05a97b0248232-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/5a33623abac44a1f95f05a97b0248232-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/5a33623abac44a1f95f05a97b0248232-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/5a33623abac44a1f95f05a97b0248232-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 441 strengthened rows, 58 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 271 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:17:10,522 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:17:10,533 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:17:10,553 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.054]
 10%|█████▎                                               | 50/500 [00:00<00:02, 172.51epoch/s]INFO 2024-10-15 22:17:10,905 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 29%|███████████████▏                                    | 146/500 [00:00<00:00, 479.91epoch/s]INFO 2024-10-15 22:17:10,987 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 52%|███████████████████████████▏                        | 261/500 [00:00<00:00, 718.59epoch/s]INFO 2024-10-15 22:17:11,069 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:17:11,138 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7657 wrong predictions, 

        accuracy = 0.8098867812096534 
    
Params (20, 0.7, 0.8, 8, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:17:27,125 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:17:27,135 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:17:27,160 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  9%|████▊                                                | 45/500 [00:00<00:02, 223.96epoch/s]INFO 2024-10-15 22:17:27,424 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 26%|█████████████▋                                      | 132/500 [00:00<00:00, 510.14epoch/s]INFO 2024-10-15 22:17:27,495 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 55%|████████████████████████████▊                       | 277/500 [00:00<00:00, 873.97epoch/s]INFO 2024-10-15 22:17:27,565 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:17:27,636 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:17:28,722 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/cbc73519476d4ceba96479acd05561a3-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/cbc73519476d4ceba96479acd05561a3-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/cbc73519476d4ceba96479acd05561a3-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/cbc73519476d4ceba96479acd05561a3-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 406 strengthened rows, 60 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 232 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:17:44,725 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:17:44,735 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:17:44,753 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.048]
 10%|█████                                                | 48/500 [00:00<00:03, 129.82epoch/s]INFO 2024-10-15 22:17:45,146 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.007]
 29%|███████████████▎                                    | 147/500 [00:00<00:00, 423.29epoch/s]INFO 2024-10-15 22:17:45,236 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.006]
 58%|██████████████████████████████▏                     | 290/500 [00:00<00:00, 752.55epoch/s]INFO 2024-10-15 22:17:45,306 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.005]
INFO 2024-10-15 22:17:45,374 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.004]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 8115 wrong predictions, 

        accuracy = 0.7985152448108055 
    
Params (20, 0.7, 0.8, 3, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:18:01,288 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:18:01,298 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:18:01,328 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  8%|████                                                 | 38/500 [00:00<00:01, 347.56epoch/s]INFO 2024-10-15 22:18:01,460 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 35%|██████████████████                                  | 174/500 [00:00<00:00, 918.44epoch/s]INFO 2024-10-15 22:18:01,540 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 60%|██████████████████████████████▍                    | 299/500 [00:00<00:00, 1066.15epoch/s]INFO 2024-10-15 22:18:01,614 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:18:01,688 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:18:02,780 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/9878a3566ff64f318637af46f62f6f9b-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/9878a3566ff64f318637af46f62f6f9b-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/9878a3566ff64f318637af46f62f6f9b-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/9878a3566ff64f318637af46f62f6f9b-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 424 strengthened rows, 67 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 268 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:18:18,930 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:18:18,941 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:18:18,945 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.037]
 16%|████████▋                                            | 82/500 [00:00<00:00, 816.65epoch/s]INFO 2024-10-15 22:18:19,060 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:18:19,136 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 42%|█████████████████████▌                             | 211/500 [00:00<00:00, 1092.38epoch/s]INFO 2024-10-15 22:18:19,208 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 71%|████████████████████████████████████               | 354/500 [00:00<00:00, 1244.75epoch/s]INFO 2024-10-15 22:18:19,279 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.001]
100%|█████


        out of 60000 sentences, 36291 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7072 wrong predictions, 

        accuracy = 0.8051307486704693 
    
Params (20, 0.7, 0.8, 42, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


In [None]:
# res_dfs = []

In [None]:
# for dname in res_storing:
#     dsummary = pd.DataFrame([res_storing[dname]['summary']])
#     dsummary['dataset'] = dname
#     res_dfs.append(dsummary)

In [None]:
# df_res = pd.concat(res_dfs)

In [None]:
# df_res.sort_values(by='dataset_name')

In [None]:
# df_res[['before_fix_global_accuracy','confirm_prev_rate','complain_fix_rate','new_global_accuracy','dataset']]

In [34]:
user_input_sizes_test = [20]
random_states_test = [7]
lf_acc_threshs_test = [0.7]
instance_acc_threshs_test = [0.8]
# non_abstain_threshs_test = [0.8]
datasets_test = list(dataset_dict)
func_dictionary_test = [dataset_dict]

In [35]:
input_params_test = list(itertools.product(
    user_input_sizes_test,
    lf_acc_threshs_test,
    instance_acc_threshs_test,
#     non_abstain_threshs_test,
    random_states_test,
    datasets_test,
    func_dictionary_test
))

In [36]:
test_main_with_timeout(input_params, time_limit_minutes=20)

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:56:09,854 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:56:09,865 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:56:09,883 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 15%|████████▏                                            | 77/500 [00:00<00:02, 150.75epoch/s]INFO 2024-10-15 22:56:10,483 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 27%|██████████████▏                                     | 136/500 [00:00<00:01, 225.89epoch/s]INFO 2024-10-15 22:56:10,687 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 43%|██████████████████████▎                             | 214/500 [00:00<00:00, 389.82epoch/s]INFO 2024-10-15 22:56:10,761 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 71%|█████████████████████████████████████               | 356/500 [00:00<00:00, 695.39epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:56:11,763 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/4b4c44e07d3a4ccab16f0f72f51fc205-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/4b4c44e07d3a4ccab16f0f72f51fc205-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/4b4c44e07d3a4ccab16f0f72f51fc205-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/4b4c44e07d3a4ccab16f0f72f51fc205-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 402 strengthened rows, 60 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 235 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:56:28,097 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:56:28,107 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:56:28,132 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.053]
  6%|██▉                                                  | 28/500 [00:00<00:01, 278.06epoch/s]INFO 2024-10-15 22:56:28,296 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 24%|████████████▍                                       | 120/500 [00:00<00:00, 653.00epoch/s]INFO 2024-10-15 22:56:28,368 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
 53%|██████████████████████████▊                        | 263/500 [00:00<00:00, 1005.74epoch/s]INFO 2024-10-15 22:56:28,439 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:56:28,508 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.003]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7546 wrong predictions, 

        accuracy = 0.8126427649220379 
    
Params (20, 0.7, 0.8, 1, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:56:44,754 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:56:44,764 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:56:44,800 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 19%|██████████▏                                          | 96/500 [00:00<00:01, 350.54epoch/s]INFO 2024-10-15 22:56:45,090 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:56:45,172 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 43%|██████████████████████▎                             | 215/500 [00:00<00:00, 666.39epoch/s]INFO 2024-10-15 22:56:45,251 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 69%|███████████████████████████████████▋                | 343/500 [00:00<00:00, 879.49epoch/s]INFO 2024-10-15 22:56:45,331 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:56:46,539 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/1f2857e9cd7f45d497e5b6992e7e0376-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/1f2857e9cd7f45d497e5b6992e7e0376-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/1f2857e9cd7f45d497e5b6992e7e0376-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/1f2857e9cd7f45d497e5b6992e7e0376-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 419 strengthened rows, 56 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 258 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:57:02,490 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:57:02,502 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:57:02,534 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.055]
 13%|██████▉                                              | 65/500 [00:00<00:01, 237.24epoch/s]INFO 2024-10-15 22:57:02,842 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 38%|███████████████████▊                                | 191/500 [00:00<00:00, 635.29epoch/s]INFO 2024-10-15 22:57:02,921 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:57:02,998 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 64%|█████████████████████████████████▏                  | 319/500 [00:00<00:00, 865.36epoch/s]INFO 2024-10-15 22:57:03,076 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7562 wrong predictions, 

        accuracy = 0.8122455060085411 
    
Params (20, 0.7, 0.8, 321, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:57:19,135 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:57:19,146 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:57:19,164 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 17%|█████████                                            | 86/500 [00:00<00:01, 266.62epoch/s]INFO 2024-10-15 22:57:19,539 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 38%|███████████████████▋                                | 189/500 [00:00<00:00, 532.88epoch/s]INFO 2024-10-15 22:57:19,631 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:57:19,710 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 62%|████████████████████████████████▎                   | 311/500 [00:00<00:00, 761.50epoch/s]INFO 2024-10-15 22:57:19,788 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:57:20,850 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/33fe90d07a9c47bea1b248cf32a640b5-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/33fe90d07a9c47bea1b248cf32a640b5-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/33fe90d07a9c47bea1b248cf32a640b5-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/33fe90d07a9c47bea1b248cf32a640b5-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 455 strengthened rows, 66 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 291 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:57:36,825 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:57:36,835 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:57:36,875 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.062]
 10%|█████▌                                               | 52/500 [00:00<00:03, 134.62epoch/s]INFO 2024-10-15 22:57:37,343 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 28%|██████████████▎                                     | 138/500 [00:00<00:00, 381.11epoch/s]INFO 2024-10-15 22:57:37,426 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 53%|███████████████████████████▊                        | 267/500 [00:00<00:00, 677.33epoch/s]INFO 2024-10-15 22:57:37,503 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 79%|████████████████████████████████████████▉           | 394/500 [00:00<00:00, 864.11epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7665 wrong predictions, 

        accuracy = 0.809688151752905 
    
Params (20, 0.7, 0.8, 4, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:57:59,692 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:57:59,701 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:57:59,715 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  9%|████▉                                                | 46/500 [00:00<00:02, 181.13epoch/s]INFO 2024-10-15 22:58:00,042 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 22%|███████████▍                                        | 110/500 [00:00<00:01, 363.23epoch/s]INFO 2024-10-15 22:58:00,132 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 45%|███████████████████████▏                            | 223/500 [00:00<00:00, 639.96epoch/s]INFO 2024-10-15 22:58:00,220 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 67%|███████████████████████████████████                 | 337/500 [00:00<00:00, 808.80epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:58:01,372 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/2c064a0177b74ae2b77bb36e5c7b3fc5-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/2c064a0177b74ae2b77bb36e5c7b3fc5-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/2c064a0177b74ae2b77bb36e5c7b3fc5-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/2c064a0177b74ae2b77bb36e5c7b3fc5-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 440 strengthened rows, 51 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 273 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:58:22,979 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:58:22,990 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:58:22,995 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.060]
INFO 2024-10-15 22:58:23,092 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
 20%|██████████▎                                        | 101/500 [00:00<00:00, 1001.98epoch/s]INFO 2024-10-15 22:58:23,184 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
 42%|█████████████████████▍                             | 210/500 [00:00<00:00, 1048.68epoch/s]INFO 2024-10-15 22:58:23,266 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 67%|██████████████████████████████████▎                | 336/500 [00:00<00:00, 1144.40epoch/s]INFO 2024-10-15 22:58:23,345 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7619 wrong predictions, 

        accuracy = 0.8108302711292085 
    
Params (20, 0.7, 0.8, 123, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:58:39,397 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:58:39,407 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:58:39,424 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 13%|██████▋                                              | 63/500 [00:00<00:02, 204.48epoch/s]INFO 2024-10-15 22:58:39,757 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 39%|████████████████████                                | 193/500 [00:00<00:00, 625.80epoch/s]INFO 2024-10-15 22:58:39,828 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:58:39,900 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 66%|██████████████████████████████████▏                 | 329/500 [00:00<00:00, 885.97epoch/s]INFO 2024-10-15 22:58:39,971 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:58:40,958 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/0b2267b718d4444db0ec410332eb84f9-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/0b2267b718d4444db0ec410332eb84f9-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/0b2267b718d4444db0ec410332eb84f9-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/0b2267b718d4444db0ec410332eb84f9-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 440 strengthened rows, 56 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 274 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:58:57,128 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:58:57,138 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:58:57,156 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.066]
 15%|███████▉                                             | 75/500 [00:00<00:01, 382.30epoch/s]INFO 2024-10-15 22:58:57,363 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
INFO 2024-10-15 22:58:57,433 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.003]
 43%|██████████████████████▎                             | 215/500 [00:00<00:00, 843.30epoch/s]INFO 2024-10-15 22:58:57,503 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 72%|████████████████████████████████████▌              | 358/500 [00:00<00:00, 1072.83epoch/s]INFO 2024-10-15 22:58:57,574 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 8226 wrong predictions, 

        accuracy = 0.7957592610984209 
    
Params (20, 0.7, 0.8, 6, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:59:13,590 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:59:13,599 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:59:13,626 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
  7%|███▋                                                 | 35/500 [00:00<00:02, 177.08epoch/s]INFO 2024-10-15 22:59:13,870 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 29%|███████████████▎                                    | 147/500 [00:00<00:00, 600.65epoch/s]INFO 2024-10-15 22:59:13,949 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 55%|████████████████████████████▍                       | 273/500 [00:00<00:00, 856.17epoch/s]INFO 2024-10-15 22:59:14,030 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:59:14,107 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:59:15,234 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/38cb3b59cf1b43be87d5e748866fa124-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/38cb3b59cf1b43be87d5e748866fa124-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/38cb3b59cf1b43be87d5e748866fa124-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/38cb3b59cf1b43be87d5e748866fa124-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 427 strengthened rows, 58 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 261 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:59:31,410 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:59:31,421 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:59:31,440 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.048]
  9%|████▋                                                | 44/500 [00:00<00:01, 229.94epoch/s]INFO 2024-10-15 22:59:31,671 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.004]
 37%|███████████████████                                 | 183/500 [00:00<00:00, 754.45epoch/s]INFO 2024-10-15 22:59:31,743 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 22:59:31,812 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 65%|████████████████████████████████▉                  | 323/500 [00:00<00:00, 1007.46epoch/s]INFO 2024-10-15 22:59:31,883 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 31729 wrong predictions, 

        accuracy = 0.21221074585361008 
    
Params (20, 0.7, 0.8, 5, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 22:59:47,547 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 22:59:47,556 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 22:59:47,570 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 16%|████████▋                                            | 82/500 [00:00<00:00, 448.04epoch/s]INFO 2024-10-15 22:59:47,777 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 22:59:47,858 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 41%|█████████████████████▏                              | 204/500 [00:00<00:00, 796.49epoch/s]INFO 2024-10-15 22:59:47,935 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 67%|██████████████████████████████████▊                 | 335/500 [00:00<00:00, 997.57epoch/s]INFO 2024-10-15 22:59:48,013 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 22:59:49,008 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/c6f0d24afb1f4009ac840d234edd6795-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/c6f0d24afb1f4009ac840d234edd6795-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/c6f0d24afb1f4009ac840d234edd6795-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/c6f0d24afb1f4009ac840d234edd6795-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 417 strengthened rows, 68 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 257 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 23:00:05,235 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 23:00:05,246 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 23:00:05,273 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.052]
  4%|█▉                                                   | 18/500 [00:00<00:02, 174.15epoch/s]INFO 2024-10-15 23:00:05,434 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.004]
 24%|████████████▌                                       | 121/500 [00:00<00:00, 668.47epoch/s]INFO 2024-10-15 23:00:05,526 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 46%|███████████████████████▉                            | 230/500 [00:00<00:00, 858.81epoch/s]INFO 2024-10-15 23:00:05,617 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 68%|███████████████████████████████████▎                | 340/500 [00:00<00:00, 953.05epoc


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7523 wrong predictions, 

        accuracy = 0.8132138246101897 
    
Params (20, 0.7, 0.8, 2, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 23:00:22,129 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 23:00:22,140 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 23:00:22,144 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
INFO 2024-10-15 23:00:22,216 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 28%|██████████████                                     | 138/500 [00:00<00:00, 1376.16epoch/s]INFO 2024-10-15 23:00:22,286 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 56%|████████████████████████████▊                      | 282/500 [00:00<00:00, 1408.31epoch/s]INFO 2024-10-15 23:00:22,358 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 23:00:22,428 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|███████████████████████████████████████████████████| 500/500 [00:00<00:00, 1403.44epoch/s]
INFO 2024


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 23:00:23,554 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/fc4d2af4b322463783b2989dbb3081f9-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/fc4d2af4b322463783b2989dbb3081f9-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/fc4d2af4b322463783b2989dbb3081f9-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/fc4d2af4b322463783b2989dbb3081f9-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 394 strengthened rows, 70 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 222 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 23:00:46,328 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 23:00:46,338 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 23:00:46,346 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.042]
 20%|██████████▍                                          | 99/500 [00:00<00:01, 261.16epoch/s]INFO 2024-10-15 23:00:46,754 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.006]
INFO 2024-10-15 23:00:46,841 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.005]
 42%|█████████████████████▊                              | 210/500 [00:00<00:00, 562.87epoch/s]INFO 2024-10-15 23:00:46,927 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.003]
 65%|██████████████████████████████████                  | 327/500 [00:00<00:00, 766.03epoch/s]INFO 2024-10-15 23:00:47,013 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 39808 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 31106 wrong predictions, 

        accuracy = 0.2185992765273312 
    
Params (20, 0.7, 0.8, 7, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 23:01:02,850 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 23:01:02,859 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 23:01:02,895 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 12%|██████▎                                              | 60/500 [00:00<00:02, 219.19epoch/s]INFO 2024-10-15 23:01:03,195 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 40%|████████████████████▊                               | 200/500 [00:00<00:00, 682.42epoch/s]INFO 2024-10-15 23:01:03,267 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 23:01:03,339 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 68%|███████████████████████████████████▏                | 338/500 [00:00<00:00, 930.95epoch/s]INFO 2024-10-15 23:01:03,410 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 23:01:04,387 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/1b89a993c8fe480392f8a4c25267295d-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/1b89a993c8fe480392f8a4c25267295d-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/1b89a993c8fe480392f8a4c25267295d-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/1b89a993c8fe480392f8a4c25267295d-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 441 strengthened rows, 58 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 271 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 23:01:20,588 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 23:01:20,599 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 23:01:20,613 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.054]
 12%|██████▌                                              | 62/500 [00:00<00:03, 139.85epoch/s]INFO 2024-10-15 23:01:21,057 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 36%|██████████████████▌                                 | 178/500 [00:00<00:00, 446.42epoch/s]INFO 2024-10-15 23:01:21,143 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 23:01:21,221 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 60%|███████████████████████████████▍                    | 302/500 [00:00<00:00, 682.30epoch/s]INFO 2024-10-15 23:01:21,302 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7657 wrong predictions, 

        accuracy = 0.8098867812096534 
    
Params (20, 0.7, 0.8, 8, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 23:01:37,568 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 23:01:37,579 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 23:01:37,588 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
INFO 2024-10-15 23:01:37,657 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 27%|█████████████▊                                     | 135/500 [00:00<00:00, 1343.52epoch/s]INFO 2024-10-15 23:01:37,727 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 56%|████████████████████████████▋                      | 281/500 [00:00<00:00, 1409.85epoch/s]INFO 2024-10-15 23:01:37,796 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
INFO 2024-10-15 23:01:37,864 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|███████████████████████████████████████████████████| 500/500 [00:00<00:00, 1422.96epoch/s]
INFO 2024


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 23:01:38,983 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/75aab3ae41d24642904f50e20daba511-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/75aab3ae41d24642904f50e20daba511-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/75aab3ae41d24642904f50e20daba511-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/75aab3ae41d24642904f50e20daba511-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 406 strengthened rows, 60 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 232 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 23:01:54,710 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 23:01:54,720 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 23:01:54,751 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.048]
  9%|████▉                                                | 47/500 [00:00<00:03, 144.14epoch/s]INFO 2024-10-15 23:01:55,101 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.007]
 33%|█████████████████▏                                  | 165/500 [00:00<00:00, 526.59epoch/s]INFO 2024-10-15 23:01:55,185 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.006]
 58%|██████████████████████████████▎                     | 292/500 [00:00<00:00, 781.86epoch/s]INFO 2024-10-15 23:01:55,264 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.005]
INFO 2024-10-15 23:01:55,340 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.004]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 8115 wrong predictions, 

        accuracy = 0.7985152448108055 
    
Params (20, 0.7, 0.8, 3, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 23:02:11,536 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 23:02:11,545 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 23:02:11,565 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.065]
 13%|██████▉                                              | 65/500 [00:00<00:01, 358.10epoch/s]INFO 2024-10-15 23:02:11,776 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
INFO 2024-10-15 23:02:11,846 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 41%|█████████████████████▎                              | 205/500 [00:00<00:00, 831.07epoch/s]INFO 2024-10-15 23:02:11,916 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 70%|███████████████████████████████████▌               | 349/500 [00:00<00:00, 1069.77epoch/s]INFO 2024-10-15 23:02:11,987 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.002]
100%|█████


        out of 60000 sentences, 40276 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7654 wrong predictions, 

        accuracy = 0.8099612672559341 
    


DEBUG 2024-10-15 23:02:12,973 [coin_api.py:solve_CBC:165] /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/415e3a1056fc4573ac03227f80e211c7-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/415e3a1056fc4573ac03227f80e211c7-pulp.sol 


lf_acc: 0.7, ins_acc:0.8
labeling_functions: ['lf_1', 'lf_2', 'lf_3', 'lf_4', 'lf_5', 'lf_6', 'lf_7', 'lf_8', 'lf_9']
num_instances: 20
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/opc/.pyenv/versions/3.8.0/envs/label/lib/python3.8/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/415e3a1056fc4573ac03227f80e211c7-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/415e3a1056fc4573ac03227f80e211c7-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 1474 COLUMNS
At line 10295 RHS
At line 11765 BOUNDS
At line 13566 ENDATA
Problem MODEL has 1469 rows, 1620 columns and 4500 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 424 strengthened rows, 67 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 268 strengthened rows, 0 substitutions
Cgl0004I processed model has

  sentences_df=pd.read_sql(f'SELECT * FROM {dataset_name}', conn)
INFO 2024-10-15 23:02:29,174 [label_model.py:fit:905] Computing O...
INFO 2024-10-15 23:02:29,184 [label_model.py:fit:911] Estimating \mu...
  0%|                                                               | 0/500 [00:00<?, ?epoch/s]INFO 2024-10-15 23:02:29,191 [logger.py:log:79] [0 epochs]: TRAIN:[loss=0.037]
INFO 2024-10-15 23:02:29,275 [logger.py:log:79] [100 epochs]: TRAIN:[loss=0.003]
 23%|███████████▋                                       | 115/500 [00:00<00:00, 1145.84epoch/s]INFO 2024-10-15 23:02:29,354 [logger.py:log:79] [200 epochs]: TRAIN:[loss=0.002]
 49%|████████████████████████▉                          | 244/500 [00:00<00:00, 1229.01epoch/s]INFO 2024-10-15 23:02:29,428 [logger.py:log:79] [300 epochs]: TRAIN:[loss=0.002]
 78%|███████████████████████████████████████▌           | 388/500 [00:00<00:00, 1323.91epoch/s]INFO 2024-10-15 23:02:29,497 [logger.py:log:79] [400 epochs]: TRAIN:[loss=0.001]
100%|█████


        out of 60000 sentences, 36291 actually got at least one signal to 

        make prediction. Out of all the valid predictions, we have 7072 wrong predictions, 

        accuracy = 0.8051307486704693 
    
Params (20, 0.7, 0.8, 42, 'agnews', {'agnews': <function gen_agnews_funcs at 0x7fa2eb7e2550>}) finished successfully.


[{'summary': {'before_fix_global_accuracy': 0.5437,
   'user_input_size': 20,
   'lf_acc_thresh': 0.7,
   'instance_acc_thresh': 0.8,
   'dataset_name': 'agnews',
   'random_state': 1,
   'confirm_prev_rate': 1.0,
   'complain_fix_rate': 0.5,
   'new_global_accuracy': 0.5455,
   'global_accuracy_on_valid_data': 0.8099612672559341,
   'new_global_accuracy_on_valid': 0.8126427649220379,
   'valid_global_data_size': 40276,
   'new_valid_global_data_size': 40276,
   'runtimes': {'snorkel_first_run': 16.572025060653687,
    'snorkel_run_after_fix': 17.4846031665802,
    'solver_runtime': 0.4164586067199707,
    'repair_time': 0.0686643123626709}},
  'fix_details': {'original_0': {'rule': PredicateNode(id=1, pred=keyword_predicate-word-(space,microsoft,announced,software,users,windows))
        LabelNode(id=2, label=-1)
        PredicateNode(id=4, pred=keyword_predicate-word-(corp), added=True)
            PredicateNode(id=7, pred=keyword_predicate-word-(chip), added=True)
                Pr

In [39]:
if([False]):
    print("what")

what
