In [8]:
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch

# Load the NLI model and tokenizer fine-tuned on MNLI
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
nli_model.to(device)

# Integrate the zero-shot-classification pipeline
#classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

def classify_relationship(text1, text2):
    # Prepare the prompt for zero-shot classification using the pose sequence as the premise and label as hypothesis
    premise = text1
    hypothesis = f'This example is {text2}.'

    # Tokenize the input prompt and handle truncation errors
    max_length = tokenizer.model_max_length
    inputs = tokenizer.encode(premise, hypothesis, return_tensors='pt', truncation=True, max_length=max_length).to(device)

    # Pass the input through the zero-shot classification model
    with torch.no_grad():
        logits = nli_model(inputs)[0]

    # We throw away "neutral" (dim 1) and take the probability of "entailment" (2) as the probability of the label being true
    entail_contradiction_logits = logits[:, [0, 2]]
    probs = entail_contradiction_logits.softmax(dim=1)
    prob_label_is_true = probs[:, 1]

    # Get the predicted relationship label based on the probability threshold (you can set a threshold)
    threshold = 0.5
    relationship_type = "Entailment" if prob_label_is_true > threshold else "Contradiction"

    return relationship_type


def classify_relationship_csv(csv_file, column1, column2, ground_truth_column):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file)

    # Select the specified columns for relationship classification
    text_data1 = df[column1].tolist()
    text_data2 = df[column2].tolist()

    # Classify relationship for each pair of texts
    relationships = []
    for text1, text2 in zip(text_data1, text_data2):
        relationship = classify_relationship(text2, text1)
        relationships.append(relationship)

    # Add the relationships to the DataFrame
    df['predicted_relationship'] = relationships

    # Calculate evaluation metrics
    y_true = df[ground_truth_column].tolist()
    y_pred = df['predicted_relationship'].tolist()
    accuracy = accuracy_score(y_true, y_pred)
    print(y_true)
    print(y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)

    # Save the updated DataFrame to a new CSV file
    output_csv = 'D:/Thesis/Processed Data/t5-zero-shot-test-summary-relationship.csv'
    df.to_csv(output_csv, index=False)

    return df, accuracy, precision, recall, f1, cm

# Specify the CSV file path and the columns for relationship classification
csv_file = 'c:/Users/akhil/Downloads/your_file_concatenated.csv'
column1 = 'Statement'
column2 = 'concatenated'
ground_truth_column = 'Label'  # Replace with the actual column name in your CSV

# Classify relationship for the CSV file and calculate evaluation metrics
relationship_df, accuracy, precision, recall, f1, confusion_matrix = classify_relationship_csv(csv_file, column1, column2, ground_truth_column)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("Confusion Matrix:/n", confusion_matrix)


['Entailment', 'Contradiction', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Entailment', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Entailment', 'Contradiction', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Ent

In [7]:
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch

# Load the NLI model and tokenizer fine-tuned on MNLI
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
nli_model.to(device)

# Integrate the zero-shot-classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

def classify_relationship(text1, text2):
    # Prepare the prompt for zero-shot classification using the pose sequence as the premise and label as hypothesis
    hypothesis_template = "This example is {}."
    candidate_labels = ["Entailment", "Contradiction"]

    # Remove single quotes from text2
    text2 = text2.replace("'", "")

    # Pass the sequences as a list to the classifier
    sequences = [text1, hypothesis_template.format(text2)]

    # Zero-shot classification using the pipeline
    result = classifier(sequences, candidate_labels)

    # Get the predicted relationship label with the highest score
    predicted_label = result[0]["labels"][0]

    return predicted_label



def classify_relationship_csv(csv_file, column1, column2, ground_truth_column):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file)

    # Select the specified columns for relationship classification
    text_data1 = df[column1].tolist()
    text_data2 = df[column2].tolist()

    # Classify relationship for each pair of texts
    relationships = []
    for text1, text2 in zip(text_data1, text_data2):
        relationship = classify_relationship(text1, text2)
        relationships.append(relationship)

    # Add the relationships to the DataFrame
    df['predicted_relationship'] = relationships

    # Calculate evaluation metrics
    y_true = df[ground_truth_column].tolist()
    y_pred = df['predicted_relationship'].tolist()
    accuracy = accuracy_score(y_true, y_pred)
    print(y_true)
    print(y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)

    # Save the updated DataFrame to a new CSV file
    output_csv = 'D:/Thesis/Processed Data/t5-zero-shot-test-summary-relationship.csv'
    df.to_csv(output_csv, index=False)

    return df, accuracy, precision, recall, f1, cm

# Specify the CSV file path and the columns for relationship classification
csv_file = 'c:/Users/akhil/Downloads/your_file_concatenated.csv'
column1 = 'Statement'
column2 = 'concatenated'
ground_truth_column = 'Label'  # Replace with the actual column name in your CSV

# Classify relationship for the CSV file and calculate evaluation metrics
relationship_df, accuracy, precision, recall, f1, confusion_matrix = classify_relationship_csv(csv_file, column1, column2, ground_truth_column)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("Confusion Matrix:/n", confusion_matrix)


['Entailment', 'Contradiction', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Entailment', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Entailment', 'Contradiction', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Contradiction', 'Entailment', 'Entailment', 'Contradiction', 'Ent