## LLM performance

In [1]:
import datasets

with open("agnews_predictions.txt") as f:
    outputs = [int(line.strip()) for line in f.readlines()]

data = datasets.load_from_disk('2000_sampled_agnews')
labels = data['label']
# calculate overall accuracy
correct = 0
total = 0
for i in range(len(outputs)):
    total += 1
    if outputs[i] == labels[i]:
        correct += 1
print("Overall accuracy: {}".format(correct / total))
# calculate accuracy for each of the 4 classes
for i in range(4):
    correct = 0
    total = 0
    for j in range(len(outputs)):
        if labels[j] == i:
            total += 1
            if outputs[j] == labels[j]:
                correct += 1
    print("class {}, Total: {}, Accuracy: {:.2f}%".format(i, total, correct / total*100))

Overall accuracy: 0.6365
class 0, Total: 495, Accuracy: 33.33%
class 1, Total: 521, Accuracy: 88.48%
class 2, Total: 486, Accuracy: 73.05%
class 3, Total: 498, Accuracy: 58.63%


  from .autonotebook import tqdm as notebook_tqdm


## LLM labels as proxy

#### dataset & library import

In [2]:
# plot the accuracy changes as we adjust the number of samples
import matplotlib.pyplot as plt
import numpy as np
# beautify the plot
plt.style.use('seaborn-darkgrid')

  plt.style.use('seaborn-darkgrid')


#### td-idf as word embeddings

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer

texts = data["text"]  # list of texts to classify
labels = data["label"]  # list of texts to classify

vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 1), lowercase=True)

In [11]:
# calculate overall accuracy
correct = 0
total = 0
for i in range(len(filtered_labels)):
    total += 1
    if filtered_llama_labels[i] == filtered_labels[i]:
        correct += 1
print("Overall accuracy: {}".format(correct / total))
# calculate accuracy for each of the 4 classes
for i in range(4):
    correct = 0
    total = 0
    for j in range(len(filtered_llama_labels)):
        if filtered_labels[j] == i:
            total += 1
            if filtered_llama_labels[j] == filtered_labels[j]:
                correct += 1
    print("class {}, Total: {}, Accuracy: {:.2f}%".format(i, total, correct / total*100))
    
baseline_acc = correct / total

Overall accuracy: 0.6514841351074718
class 0, Total: 458, Accuracy: 36.03%
class 1, Total: 521, Accuracy: 88.48%
class 2, Total: 482, Accuracy: 73.65%
class 3, Total: 493, Accuracy: 59.23%


In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

with open("agnews_predictions.txt") as f:
    llama_labels = [int(line) for line in f.readlines()]

filtered_texts, filtered_llama_labels, filtered_labels = [], [], []
for t, l, g in zip(texts, llama_labels, labels):
    if l < 4:
        filtered_texts.append(t)
        filtered_llama_labels.append(l)
        filtered_labels.append(g)

    
# Split the data into training and test sets
X_train_proxy, X_test_proxy, y_train_proxy, y_test_proxy = train_test_split(filtered_texts, filtered_llama_labels, test_size=0.2, random_state=42)
X_train_proxy = vectorizer.fit_transform(X_train_proxy)
X_test_proxy = vectorizer.transform(X_test_proxy)

_, _, _, y_test_gold = train_test_split(filtered_texts, filtered_labels, test_size=0.2, random_state=42)

# Train a logistic regression classifier
classifier = LogisticRegression()
classifier.fit(X_train_proxy, y_train_proxy)

# Make predictions on the test set
y_pred_proxy = classifier.predict(X_test_proxy)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test_gold, y_pred_proxy)
print("Accuracy:", accuracy)

# calculate accuracy for each class
for i in range(4):
    correct = 0
    total = 0
    for j in range(len(y_pred_proxy)):
        if y_test_gold[j] == i:
            total += 1
            if y_pred_proxy[j] == y_test_gold[j]:
                correct += 1
    print("class {}, Accuracy: {:.2f}%".format(i, correct / total*100))

Accuracy: 0.5933503836317136
class 0, Accuracy: 10.53%
class 1, Accuracy: 90.29%
class 2, Accuracy: 81.82%
class 3, Accuracy: 54.29%


In [None]:
import eli5

eli5.show_weights(estimator=classifier, 
                  feature_names= list(vectorizer.get_feature_names_out()),
                 top=(5, 5))

#### sentence-bert as word embeddings

In [6]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

texts = data["text"]  # list of texts to classify
labels = data["label"]  # list of texts to classify

# Encode the training set
texts = model.encode(texts)

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

with open("agnews_predictions.txt") as f:
    llama_labels = [int(line) for line in f.readlines()]

filtered_texts, filtered_llama_labels, filtered_labels = [], [], []
for t, l, g in zip(texts, llama_labels, labels):
    if l < 4:
        filtered_texts.append(t)
        filtered_llama_labels.append(l)
        filtered_labels.append(g)

    
# Split the data into training and test sets
X_train_proxy, X_test_proxy, y_train_proxy, y_test_proxy = train_test_split(filtered_texts, filtered_llama_labels, test_size=0.2, random_state=42)
_, _, _, y_test_gold = train_test_split(filtered_texts, filtered_labels, test_size=0.2, random_state=42)

# Train a logistic regression classifier
classifier = LogisticRegression()
classifier.fit(X_train_proxy, y_train_proxy)

# Make predictions on the test set
y_pred_proxy = classifier.predict(X_test_proxy)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test_gold, y_pred_proxy)
print("Accuracy:", accuracy)

# calculate accuracy for each class
for i in range(4):
    correct = 0
    total = 0
    for j in range(len(y_pred_proxy)):
        if y_test_gold[j] == i:
            total += 1
            if y_pred_proxy[j] == y_test_gold[j]:
                correct += 1
    print("class {}, Accuracy: {:.2f}%".format(i, correct / total*100))

Accuracy: 0.7212276214833759
class 0, Accuracy: 41.05%
class 1, Accuracy: 96.12%
class 2, Accuracy: 82.95%
class 3, Accuracy: 67.62%


### RQ - Confidence threshold

In [None]:
import numpy as np
for i in range(len(y_pred_proxy)):
    assert np.argmax(classifier.decision_function(X_test_proxy)[i]) == y_pred_proxy[i]
    

In [None]:
# plot a bar chart of the distribution of the confidence scores
plt.hist([max(scores) for scores in classifier.decision_function(X_test_proxy)], bins=20)
plt.xlabel("Confidence score")
plt.ylabel("Number of samples")
plt.show()


In [None]:
confidence = classifier.decision_function(X_test_proxy)
acc = []
to = []
scale = round(max([max(scores) for scores in confidence]))
num_steps = 50
for k in range(num_steps):
    total, count = 0, 0
    for i in range(len(y_pred_proxy)):
        total += 1
        if max(confidence[i]) > k/num_steps*scale:
            total += 1
            if y_pred_proxy[i] != y_test_gold[i]:
                count += 1
    to.append(total)
    acc.append(1 - count / total)

plt.plot([i/num_steps*scale for i in range(num_steps)], acc)
plt.xlabel("Confidence threshold")
plt.ylabel("Accuracy")
plt.title("Accuracy vs. Confidence threshold")
plt.gcf().set_size_inches(7, 4)
# plt.savefig('confidence.png', dpi=300)
plt.show()

In [None]:
confidence = classifier.decision_function(X_test_proxy)
acc = []
to = []
scale = round(max([max(scores) for scores in confidence]))
for k in range(num_steps):
    total, count = 0, 0
    for i in range(len(y_pred_proxy)):
        total += 1
        if max(confidence[i]) > k/num_steps*scale:
            label_we_use = y_pred_proxy[i]
        else:
            label_we_use = y_test_proxy[i]
        
        if label_we_use == y_test_gold[i]:
            count += 1
    acc.append(count / total)


plt.plot([i/num_steps*scale for i in range(num_steps)], acc)
plt.plot([i/num_steps*scale for i in range(num_steps)], [baseline_acc for _ in range(num_steps)], linestyle='--')
plt.xlabel("Confidence threshold")
plt.ylabel("Accuracy")
plt.title("Overall Accuracy vs. Confidence threshold")
plt.gcf().set_size_inches(7, 4)
plt.annotate(f"Max accuracy: {max(acc)}", (0.3, max(acc)))

# plt.savefig('confidence.png', dpi=300)
plt.show()

### RQ - Change training data size

In [None]:
accuracies = []
X_len = X_train_proxy.shape[0]
y_len = len(y_train_proxy)
for i in range(1, 80):
    # Split the data into training and test sets
    X_train_proxy_sampled, y_train_proxy_sampled = X_train_proxy[:min(i*10,X_len)], y_train_proxy[:min(i*10,y_len)]
    # Train a logistic regression classifier
    classifier = LogisticRegression()
    classifier.fit(X_train_proxy_sampled, y_train_proxy_sampled)
    

    # Make predictions on the test set
    y_pred_proxy_sampled = classifier.predict(X_test_proxy)

    # Evaluate the accuracy of the classifier
    accuracy = accuracy_score(y_test_proxy, y_pred_proxy_sampled)
    accuracies.append(accuracy)

plt.plot(np.arange(10, 800, 10), accuracies)
plt.xlabel("Number of samples")
plt.ylabel("Accuracy")
# set size
plt.gcf().set_size_inches(10, 5)
plt.savefig('accuracy.png', dpi=300)

### RQ - Manually adjust label distribution

In [None]:
num_samples_idx=8
X_train_proxy_sampled, y_train_proxy_sampled = X_train_proxy[:min(num_samples_idx*10,X_len)], y_train_proxy[:min(num_samples_idx*10,y_len)]
# Train a logistic regression classifier
classifier = LogisticRegression()
classifier.fit(X_train_proxy_sampled, y_train_proxy_sampled)

# Make predictions on the test set
y_pred_proxy_sampled = classifier.predict(X_test_proxy)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test_proxy, y_pred_proxy_sampled)
print("Accuracy:", accuracy)

# calculate accuracy for each class
pos_correct = 0
pos_total = 0
neg_correct = 0
neg_total = 0
for i in range(len(y_pred_proxy_sampled)):
    if y_test_proxy[i] == 1:
        pos_total += 1
        if y_pred_proxy_sampled[i] == y_test_proxy[i]:
            pos_correct += 1
    else:
        neg_total += 1
        if y_pred_proxy_sampled[i] == y_test_proxy[i]:
            neg_correct += 1

print("Positive accuracy: ", pos_correct / pos_total)
print("Negative accuracy: ", neg_correct / neg_total)

In [None]:
import scipy
# put more negative samples into training data
new_negative_samples = []
new_positive_samples = []
for x, y in zip(X_train_proxy[:][min(num_samples_idx*10,X_len):], y_train_proxy[:][min(num_samples_idx*10,y_len):]):
    if y == 0:
        new_negative_samples = scipy.sparse.vstack([new_negative_samples, x]).tocsr()
    else:
        new_positive_samples = scipy.sparse.vstack([new_positive_samples, x]).tocsr()
print(new_negative_samples.shape, new_positive_samples.shape)

In [None]:
new_accuracy, new_pos_accuracy, new_neg_accuracy = [accuracy], [pos_correct / pos_total], [neg_correct / neg_total]
num_new_samples = 10
for k in range(num_new_samples):
    X_train_proxy_sampled_plus = scipy.sparse.vstack([X_train_proxy[:min(num_samples_idx*10,X_len)], new_negative_samples[:k]])
    y_train_proxy_sampled_plus = y_train_proxy[:min(num_samples_idx*10,y_len)] + [0] * k

    # Train a logistic regression classifier
    classifier = LogisticRegression()
    classifier.fit(X_train_proxy_sampled_plus, y_train_proxy_sampled_plus)

    # Make predictions on the test set
    y_pred_proxy_sampled_plus = classifier.predict(X_test_proxy)

    # Evaluate the accuracy of the classifier
    accuracy = accuracy_score(y_test_proxy, y_pred_proxy_sampled_plus)
    new_accuracy.append(accuracy)

    # calculate accuracy for each class
    pos_correct = 0
    pos_total = 0
    neg_correct = 0
    neg_total = 0
    for i in range(len(y_pred_proxy_sampled_plus)):
        if y_test_proxy[i] == 1:
            pos_total += 1
            if y_pred_proxy_sampled_plus[i] == y_test_proxy[i]:
                pos_correct += 1
        else:
            neg_total += 1
            if y_pred_proxy_sampled_plus[i] == y_test_proxy[i]:
                neg_correct += 1
                
    new_pos_accuracy.append(pos_correct / pos_total)
    new_neg_accuracy.append(neg_correct / neg_total)

In [None]:
# plot the accuracy changes as we adjust the number of samples
plt.plot(np.arange(num_samples_idx*10, num_samples_idx*10+num_new_samples+1), new_accuracy)
plt.plot(np.arange(num_samples_idx*10, num_samples_idx*10+num_new_samples+1), new_pos_accuracy)
plt.plot(np.arange(num_samples_idx*10, num_samples_idx*10+num_new_samples+1), new_neg_accuracy)
plt.xlabel("Number of samples")
plt.ylabel("Accuracy")
# set x-axis split
plt.xticks(np.arange(num_samples_idx*10, num_samples_idx*10+num_new_samples+1, 1))
# set size
plt.gcf().set_size_inches(10, 4)
plt.savefig('accuracy_change.png', dpi=300)
plt.legend(["Overall", "Positive", "Negative"])
print(new_accuracy)


## Training with Ground Truth

In [13]:

# Split the data into training and test sets
X_train_gold, X_test_gold, y_train_gold, y_test_gold = train_test_split(filtered_texts, filtered_labels, test_size=0.2, random_state=42)

X_train_gold = vectorizer.fit_transform(X_train_gold)
X_test_gold = vectorizer.transform(X_test_gold)

# Train a logistic regression classifier
classifier = LogisticRegression()
classifier.fit(X_train_gold, y_train_gold)

# Make predictions on the test set
y_pred_gold = classifier.predict(X_test_gold)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test_gold, y_pred_gold)
print("Accuracy:", accuracy)

# calculate accuracy for each class
for i in range(4):
    correct = 0
    total = 0
    for j in range(len(y_pred_gold)):
        if y_test_gold[j] == i:
            total += 1
            if y_pred_gold[j] == y_test_gold[j]:
                correct += 1
    print("class {}, Accuracy: {:.2f}%".format(i, correct / total*100))

Accuracy: 0.8849104859335039
class 0, Accuracy: 85.26%
class 1, Accuracy: 96.12%
class 2, Accuracy: 82.95%
class 3, Accuracy: 88.57%
