<a href="https://colab.research.google.com/github/LukasEder1/StationarityPrediction/blob/main/LLMS/DeBERTa__Zero_Shot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/LukasEder1/StationarityPrediction.git
!pip install datasets

In [None]:
import pandas as pd

df = pd.read_csv("/content/StationarityPrediction/LLMS/clean_dataset_combined.csv", delimiter=";")

In [None]:
# Getting Rid of -1 Predicitions:
mask = df['final_vote'] == -1

# select all rows except the ones that contain 'Coca Cola'
df = df[~mask]

df = df.copy()
# binarize the classification variable
df.loc[:, "final_vote"]= df["final_vote"].apply(lambda x: int(x != 0))

In [None]:
df

In [None]:
from sklearn.model_selection import train_test_split

data = df[["text", "final_vote"]].rename(columns={"final_vote": "label"})

# Split Data into 80% train, 10% validation, 10% test
train_data, test_val_data = train_test_split(data, test_size=0.2, random_state=12, stratify=data['label'])

test_data, val_data = train_test_split(test_val_data, test_size=0.5, random_state=12, stratify=test_val_data['label'])

print(f"Training set: {len(train_data)}")
print(f"Testing set: {len(test_data)}")
print(f"Validation set: {len(val_data)}")

In [None]:
from tqdm.auto import tqdm
from transformers import pipeline

classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli")

In [None]:
txts = test_data.text.values

In [None]:
candidate_labels = ["time-insensitive", "time-sensitive"]
import numpy as np

top_pred = []
outputs = []
for example in tqdm(txts):
    out = classifier(example, candidate_labels, multi_label=False)
    outputs.append(out)
    top_pred.append(out["labels"][0])

  0%|          | 0/288 [00:00<?, ?it/s]

In [None]:
def encode_labels(labels):
  return [int(label == "time-sensitive") for label in labels]

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

labels = ["stationary",  "time-sensitive"]

y_preds = encode_labels(top_pred)
y_valid = test_data["label"].values

cm = confusion_matrix(y_preds, y_valid, labels=[0, 1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=labels)
disp.plot()
plt.show()