In [None]:
!nvidia-smi

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.max_colwidth', None)

### Predicting:

In [None]:
import os
import sys
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

sys.path.insert(0, "/content/drive/MyDrive/Πτυχιακή/Code/argmining-21-keypoint-analysis-sharedtask-code/code/src-py")

In [None]:
import track_1_kp_matching

In [None]:
from track_1_kp_matching import *

In [None]:
!pip install -U sentence-transformers
import sentence_transformers

In [None]:
from sentence_transformers import SentenceTransformer, InputExample, LoggingHandler, losses, models, util
import torch

In [None]:
valid_df = pd.read_csv('/content/drive/MyDrive/Πτυχιακή/Code/argmining-21-keypoint-analysis-sharedtask-code/data/validation_translated.csv', index_col=0)
valid_keypoints_df = pd.read_csv('/content/drive/MyDrive/Πτυχιακή/Code/KPA_2021_shared_task/kpm_data/key_points_translated_dev.csv')
valid_arguments_df = pd.read_csv('/content/drive/MyDrive/Πτυχιακή/Code/KPA_2021_shared_task/kpm_data/arguments_translated_dev.csv')

In [None]:
def match_argument_with_keypoints(result, kp_dict, arg_dict):

    for arg, arg_embedding in arg_dict.items():
        result[arg] = {}
        for kp, kp_embedding in kp_dict.items():
            result[arg][kp] = util.pytorch_cos_sim(arg_embedding, kp_embedding).item()

        #Applying softmax
        kp_scores = list(result[arg].items())
        kp_ids, kp_scores = zip(*kp_scores)
        result[arg] = {kp_id:score for kp_id, score in zip(kp_ids, kp_scores)}


    return result

def predict(model, argument_df, keypoint_df, output_path, append_topic=False):
    argument_keypoints = {}
    for topic in argument_df.topic.unique():
        for stance in [-1, 1]:
            topic_keypoints_ids = keypoint_df[(keypoint_df.topic==topic) & (keypoint_df.stance==stance)]['key_point_id'].tolist()
            topic_keypoints = keypoint_df[(keypoint_df.topic==topic) & (keypoint_df.stance==stance)]['key_point'].tolist()
            if append_topic:
                topic_keypoints = [topic + ' <SEP> ' + x for x in topic_keypoints]

            topic_keypoints_embeddings = model.encode(topic_keypoints)
            topic_kp_embed = dict(zip(topic_keypoints_ids, topic_keypoints_embeddings))

            topic_arguments_ids = argument_df[(argument_df.topic==topic) & (argument_df.stance==stance)]['arg_id'].tolist()
            topic_arguments = argument_df[(argument_df.topic==topic) & (argument_df.stance==stance)]['argument'].tolist()
            topic_arguments_embeddings = model.encode(topic_arguments)
            topic_arg_embed= dict(zip(topic_arguments_ids, topic_arguments_embeddings))

            argument_keypoints = match_argument_with_keypoints(argument_keypoints, topic_kp_embed, topic_arg_embed)

    json.dump(argument_keypoints, open(output_path, 'w'))

    return argument_keypoints

In [None]:
models_list = [
    '/content/drive/MyDrive/Πτυχιακή/Code/argmining-21-keypoint-analysis-sharedtask-code/KPA_2021_shared_task/siamese-models_GR/EftychiaKarav/DistilGREEK-BERT-contrastive-10-epochs-2024-05-07_10-48-47']

pred_output_path ='/content/drive/MyDrive/Πτυχιακή/Code/argmining-21-keypoint-analysis-sharedtask-code/predictions/' #'path_to_folder_where_the_prediction_files_need_to_be_saved'

In [None]:
def predict_and_evaluate(argument_df, keypoint_df, gold_data_dir, subset_name):
    pred_df = {}
    for model_path in models_list:
        append_topic= 'topic_added' in model_path
        #Predict
        model = SentenceTransformer(model_path)
        model_name = model_path.split('/')[-1]
        predictions_file = pred_output_path+model_name+ '-' + subset_name + '-preds.json'
        json_preds = predict(model, argument_df, keypoint_df, predictions_file, append_topic)

        #Evaluate
        arg_df, kp_df, labels_df = load_kpm_data(gold_data_dir, subset=subset_name)
        merged_df = get_predictions(predictions_file, labels_df, arg_df)
        print('Evaluating {}:'.format(model_name))
        evaluate_predictions(merged_df)

        pred_df[model_name] = merged_df

    return pred_df

In [None]:
pred_dfs = predict_and_evaluate(valid_arguments_df, valid_keypoints_df,  '/content/drive/MyDrive/Πτυχιακή/Code/KPA_2021_shared_task/kpm_data', 'translated_dev')

---------

### Predicting on the final test set:

In [None]:
test_arg_df = pd.read_csv('/content/drive/MyDrive/Πτυχιακή/Code/KPA_2021_shared_task/test_data/arguments_human_translated_test.csv')
test_keypoints_df = pd.read_csv('/content/drive/MyDrive/Πτυχιακή/Code/KPA_2021_shared_task/test_data/key_points_human_translated_test.csv')


In [None]:
model = SentenceTransformer('/content/drive/MyDrive/Πτυχιακή/Code/argmining-21-keypoint-analysis-sharedtask-code/KPA_2021_shared_task/siamese-models_GR/EftychiaKarav/DistilGREEK-BERT-contrastive-10-epochs-2024-07-04_16-39-35')
json_preds = predict(model, test_arg_df, test_keypoints_df, 'output_of_the_predicition', True)

In [None]:
pred_test = predict_and_evaluate(test_arg_df, test_keypoints_df,  '/content/drive/MyDrive/Πτυχιακή/Code/KPA_2021_shared_task/test_data', 'human_translated_test')