In [None]:
!pip install fastdtw Levenshtein



In [None]:
import os
import json
import pandas as pd
import numpy as np
import Levenshtein

from ast import literal_eval

from fastdtw import fastdtw
from difflib import SequenceMatcher
from scipy.spatial.distance import euclidean
from scipy.stats import kendalltau, spearmanr

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
base_dir = '/content/drive/MyDrive/NLP/vaers_analysis'
data_dir = os.path.join(base_dir, 'data')
results_dir = os.path.join(base_dir, 'results')

In [None]:
flant5_eval_df = pd.read_csv(os.path.join(results_dir, 'flan-t5-large-peft-results.csv'))
biobart_eval_df = pd.read_csv(os.path.join(results_dir, 'biobart-v2-large-peft-results.csv'))
claude_eval_df = pd.read_csv(os.path.join(results_dir, 'claude-3-5-sonnet-20241022-results.csv'))

In [None]:
def prepare_sequence_data(eval_df):
    """Cleans and processes sequences in the evaluation DataFrame.

    - Converts stringified sequences into Python lists.
    - Strips unnecessary characters from sequences.
    - Maps sequence elements to unique numerical identifiers.
    - Adds new columns for numeric representations of sequences.

    Args:
        eval_df (pd.DataFrame): Input DataFrame with `true_sequence` and `predicted_sequence` columns.

    Returns:
        pd.DataFrame: Processed DataFrame with cleaned and mapped sequences.
    """
    eval_df['true_sequence'] = eval_df['true_sequence'].apply(literal_eval)
    eval_df['predicted_sequence'] = eval_df['predicted_sequence'].apply(literal_eval)
    eval_df['predicted_sequence'] = eval_df['predicted_sequence'].apply(lambda seq: [sym.strip().strip("'\"") for sym in seq])

    # Create a mapping of all unique values to numeric identifiers
    all_values = set(sum(eval_df['true_sequence'], []) + sum(eval_df['predicted_sequence'], []))
    mapping = {value: idx for idx, value in enumerate(all_values)}

    # Map sequences to numeric values
    eval_df['true_seq_num'] = eval_df['true_sequence'].apply(lambda seq: [mapping[x] for x in seq])
    eval_df['predicted_seq_num'] = eval_df['predicted_sequence'].apply(lambda seq: [mapping[x] for x in seq])

    return eval_df

In [None]:
flant5_eval_df = prepare_sequence_data(flant5_eval_df)
biobart_eval_df = prepare_sequence_data(biobart_eval_df)
claude_eval_df = prepare_sequence_data(claude_eval_df)

In [None]:
def calc_kendalltau(row):
    """
    Calculates Kendall's Tau rank correlation coefficient between two sequences.

    Parameters:
        row (dict): A row from a dataset containing two sequences:
                    - 'true_seq_num': List of numeric representations of the true sequence.
                    - 'predicted_seq_num': List of numeric representations of the predicted sequence.

    Returns:
        float: Kendall's Tau correlation coefficient.

    Description:
        Kendall's Tau measures the similarity in the ordering of elements between two sequences.
        It is calculated based on the number of concordant and discordant pairs. Values range from:
        -1: Perfectly discordant ordering.
         0: No correlation.
         1: Perfectly concordant ordering.
    """
    seq1 = row['true_seq_num']
    seq2 = row['predicted_seq_num']
    min_length = min(len(seq1), len(seq2))
    seq1, seq2 = seq1[:min_length], seq2[:min_length]
    tau, _ = kendalltau(seq1, seq2)
    return tau


def calc_lcs_ratio(row):
    """
    Calculates the Longest Common Subsequence (LCS) ratio between two sequences.

    Parameters:
        row (dict): A row from a dataset containing two sequences:
                    - 'true_sequence': List of strings representing the true sequence.
                    - 'predicted_sequence': List of strings representing the predicted sequence.

    Returns:
        float: Ratio of the length of the LCS to the length of the sequences.

    Description:
        The LCS ratio quantifies the similarity between two sequences based on the
        length of their longest common subsequence (LCS). A ratio closer to 1
        indicates greater similarity in the order of subsequences.
    """
    lcs_ratio = None
    seq1 = row['true_sequence']
    seq2 = row['predicted_sequence']
    if seq1 and seq2:
        matcher = SequenceMatcher(None, seq1, seq2)
        lcs_ratio = matcher.ratio()
    return lcs_ratio


def calc_dtw(row):
    """
    Calculates the Dynamic Time Warping (DTW) distance between two sequences.

    Parameters:
        row (dict): A row from a dataset containing two sequences:
                    - 'true_seq_num': List of numeric representations of the true sequence.
                    - 'predicted_seq_num': List of numeric representations of the predicted sequence.

    Returns:
        float: Normalized DTW distance between the two sequences.

    Description:
        DTW measures the similarity between sequences of varying lengths by finding an optimal alignment.
        The returned distance is normalized by dividing by the length of the longer sequence, ensuring
        consistency across different sequence lengths.
    TODO: Needs to updated to support using the embedding vector for each word
    """
    seq1 = np.array(row['true_seq_num']).reshape(-1, 1)
    seq2 = np.array(row['predicted_seq_num']).reshape(-1, 1)
    distance, _ = fastdtw(seq1, seq2, dist=euclidean)
    normalized_dist = distance / max(seq1.shape[0], seq2.shape[0])
    return normalized_dist


def calc_spearmanr(row):
    """
    Calculates the Spearman's Rank Correlation coefficient between two sequences.

    Parameters:
        row (dict): A row from a dataset containing two sequences:
                    - 'true_seq_num': List of numeric representations of the true sequence.
                    - 'predicted_seq_num': List of numeric representations of the predicted sequence.

    Returns:
        float: Spearman's Rank Correlation coefficient.

    Description:
        Spearman's Rank Correlation evaluates the monotonic relationship between two sequences
        based on their ranks. The coefficient ranges from:
        -1: Perfectly inversely correlated.
         0: No correlation.
         1: Perfectly correlated.

    TODO: Needs to updated to support using the embedding vector for each word
    """
    seq1 = row['true_seq_num']
    seq2 = row['predicted_seq_num']
    min_length = min(len(seq1), len(seq2))
    seq1 = np.array(seq1[:min_length]).reshape(-1, 1)
    seq2 = np.array(seq2[:min_length]).reshape(-1, 1)
    correlation, _ = spearmanr(seq1, seq2)
    return correlation


def calc_levenshtein_ratio(row):
    """
    Calculates the normalized Levenshtein similarity ratio between two sequences.

    Parameters:
        row (dict): A row from a dataset containing two sequences:
                    - 'true_sequence': List of strings representing the true sequence.
                    - 'predicted_sequence': List of strings representing the predicted sequence.

    Returns:
        float: Levenshtein similarity ratio (0 to 1).

    Description:
        The Levenshtein similarity ratio measures how similar two sequences are by calculating
        the minimum number of single-character edits required to transform one sequence into the other.
        The result is normalized between 0 (completely different) and 1 (identical).
    """
    seq1 = ','.join(row['true_sequence'])
    seq2 = ','.join(row['predicted_sequence'])
    return Levenshtein.ratio(seq1, seq2)


In [None]:
def calculate_evaluation_metrics(results_df):
    # Mapping column names to functions
    metrics = {
        'kendalltau': calc_kendalltau,
        'lcs_ratio': calc_lcs_ratio,
        'levenshtein_ratio': calc_levenshtein_ratio,
        # 'spearmanr': calc_spearmanr,
        # 'normalized_dtw': calc_dtw,
    }

    # Apply each metric function
    for metric_name, func in metrics.items():
        results_df[metric_name] = results_df.apply(func, axis=1)

    results_df.drop(columns=['true_seq_num', 'predicted_seq_num', 'prompt'], inplace=True)
    return results_df

In [None]:
flant5_eval_df = calculate_evaluation_metrics(flant5_eval_df)
biobart_eval_df = calculate_evaluation_metrics(biobart_eval_df)
claude_eval_df = calculate_evaluation_metrics(claude_eval_df)

In [None]:
print('Flan-T5 Evaluation Results:\n')
flant5_eval_df.head(10)

Flan-T5 Evaluation Results:



Unnamed: 0,symptom_text,true_sequence,predicted_sequence,kendalltau,lcs_ratio,levenshtein_ratio
0,"A few days after my vaccine, I noticed under t...","[Blister, Erythema]","[Blister, Erythema]",1.0,1.0,1.0
1,Period schedule on and off the chart; Increase...,"[Menstrual disorder, Heavy menstrual bleeding,...","[Menstrual disorder, Heavy menstrual bleeding,...",1.0,0.857143,0.955414
2,"within 24 hours of receiving my 2nd dose, I fi...","[Pyrexia, Chills, Headache, Myalgia, Neuralgia...","[Pyrexia, Chills, Myalgia, Myalgia, Neuralgia,...",0.435194,0.666667,0.763889
3,Side effects seem to have cleared up by the 17...,"[Vaccination complication, Headache]","[Vaccination complication, Headache]",1.0,1.0,1.0
4,I received my first Moderna vaccine on one/14/...,"[Lymphadenopathy, Arthralgia, Pain in extremit...","[Carbohydrate antigen 125, Lymphadenopathy, Ly...",-0.140859,0.380952,0.533333
5,Patient experienced only chills; Fever; Sorene...,"[Chills, Pyrexia, Myalgia]","[Chills, Pyrexia, Myalgia]",1.0,1.0,1.0
6,"8 days after the first vaccine dose, I had itc...","[Injection site pruritus, Injection site swell...","[Injection site swelling, Injection site pruri...",0.333333,0.444444,0.728972
7,Sore arm; Very tired; Headache; Burning sensat...,"[Burning sensation, Headache, Pain in extremit...","[Pain in extremity, Burning sensation, Fatigue...",-0.666667,0.5,0.5
8,Chills; Urinating (More often); This spontaneo...,"[Chills, Pollakiuria]","[Chills, Pollakiuria]",1.0,1.0,1.0
9,"Swelling Left arm, upper Calves and legs swell...","[Peripheral swelling, Vaccination site bruisin...","[Vaccination site bruising, Pain in extremity,...",-0.121212,0.8,0.844193


In [None]:
print('\nBioBART Evaluation Results:\n')
biobart_eval_df.head(10)


BioBART Evaluation Results:



Unnamed: 0,symptom_text,true_sequence,predicted_sequence,kendalltau,lcs_ratio,levenshtein_ratio
0,"A few days after my vaccine, I noticed under t...","[Blister, Erythema]","[Erythema, Blister]",-1.0,0.5,0.5
1,Period schedule on and off the chart; Increase...,"[Menstrual disorder, Heavy menstrual bleeding,...","[Menstrual disorder, Heavy menstrual bleeding,...",1.0,0.857143,0.955414
2,"within 24 hours of receiving my 2nd dose, I fi...","[Pyrexia, Chills, Headache, Myalgia, Neuralgia...","[Pyrexia, Chills, Headache, Myalgia, Neuralgia...",0.928571,0.823529,0.861314
3,Side effects seem to have cleared up by the 17...,"[Vaccination complication, Headache]","[Vaccination complication, Headache]",1.0,1.0,1.0
4,I received my first Moderna vaccine on one/14/...,"[Lymphadenopathy, Arthralgia, Pain in extremit...","[Lymphadenopathy, Menstrual disorder, Amenorrh...",-0.155556,0.272727,0.534435
5,Patient experienced only chills; Fever; Sorene...,"[Chills, Pyrexia, Myalgia]","[Chills, Pyrexia, Myalgia]",1.0,1.0,1.0
6,"8 days after the first vaccine dose, I had itc...","[Injection site pruritus, Injection site swell...","[Injection site pruritus, Injection site swell...",0.6,0.6,0.866667
7,Sore arm; Very tired; Headache; Burning sensat...,"[Burning sensation, Headache, Pain in extremit...","[Pain in extremity, Burning sensation, Headach...",0.333333,0.75,0.653846
8,Chills; Urinating (More often); This spontaneo...,"[Chills, Pollakiuria]","[Chills, Pollakiuria]",1.0,1.0,1.0
9,"Swelling Left arm, upper Calves and legs swell...","[Peripheral swelling, Vaccination site bruisin...","[Peripheral swelling, Vaccination site bruisin...",0.555556,0.727273,0.860841


In [None]:
print('\nClaude Evaluation Results:\n')
claude_eval_df.head(10)


Claude Evaluation Results:



Unnamed: 0,symptom_text,true_sequence,predicted_sequence,kendalltau,lcs_ratio,levenshtein_ratio
0,"A few days after my vaccine, I noticed under t...","[Blister, Erythema]","[Blister, Erythema]",1.0,1.0,1.0
1,Period schedule on and off the chart; Increase...,"[Menstrual disorder, Heavy menstrual bleeding,...","[Therapeutic response unexpected, Menstrual di...",-0.333333,0.75,0.609756
2,"within 24 hours of receiving my 2nd dose, I fi...","[Pyrexia, Chills, Headache, Myalgia, Neuralgia...","[Pyrexia, Chills, Headache, Myalgia, Neuralgia...",0.428571,0.941176,0.963504
3,Side effects seem to have cleared up by the 17...,"[Vaccination complication, Headache]","[Vaccination complication, Headache]",1.0,1.0,1.0
4,I received my first Moderna vaccine on one/14/...,"[Lymphadenopathy, Arthralgia, Pain in extremit...","[Lymphadenopathy, Arthralgia, Pain in extremit...",0.644444,0.727273,0.72067
5,Patient experienced only chills; Fever; Sorene...,"[Chills, Pyrexia, Myalgia]","[Chills, Pyrexia, Myalgia]",1.0,1.0,1.0
6,"8 days after the first vaccine dose, I had itc...","[Injection site pruritus, Injection site swell...","[Injection site erythema, Injection site indur...",0.6,0.6,0.781513
7,Sore arm; Very tired; Headache; Burning sensat...,"[Burning sensation, Headache, Pain in extremit...","[Burning sensation, Headache, Pain in extremit...",1.0,1.0,1.0
8,Chills; Urinating (More often); This spontaneo...,"[Chills, Pollakiuria]","[Chills, Pollakiuria]",1.0,1.0,1.0
9,"Swelling Left arm, upper Calves and legs swell...","[Peripheral swelling, Vaccination site bruisin...","[Vaccination site bruising, Pain in extremity,...",0.363636,0.666667,0.751479


In [None]:
flant5_eval_df.to_csv(os.path.join(results_dir, f'flan-t5-large-peft-eval-results.csv'), index=False)
biobart_eval_df.to_csv(os.path.join(results_dir, f'biobart-v2-large-peft-eval-results.csv'), index=False)
claude_eval_df.to_csv(os.path.join(results_dir, f'claude-3-5-sonnet-20241022-eval-results.csv'), index=False)

In [None]:
# Compute means for each model
flant5_mean = flant5_eval_df[['kendalltau', 'lcs_ratio', 'levenshtein_ratio']].mean()
biobart_mean = biobart_eval_df[['kendalltau', 'lcs_ratio', 'levenshtein_ratio']].mean()
claude_mean = claude_eval_df[['kendalltau', 'lcs_ratio', 'levenshtein_ratio']].mean()

# Combine into a single DataFrame
results = pd.DataFrame({
    'peft-flant5-v2-large': flant5_mean,
    'peft-biobart-v2-large': biobart_mean,
    'claude-3-5-sonnet-20241022': claude_mean
}).T

# Reset the index to add "model" column (optional)
results.index.name = 'model'

results

Unnamed: 0_level_0,kendalltau,lcs_ratio,levenshtein_ratio
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
peft-flant5-v2-large,0.282029,0.711846,0.789295
peft-biobart-v2-large,0.358936,0.768909,0.82919
claude-3-5-sonnet-20241022,0.479497,0.811773,0.85111
