In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import re
import os
import json
import pandas as pd
import numpy as np

from ast import literal_eval

In [None]:
base_dir = '/content/drive/MyDrive/NLP/vaers_analysis'
data_dir = os.path.join(base_dir, 'data')

In [None]:
vaers_data = pd.read_csv(os.path.join(data_dir, 'test_vaers_data.csv'))
vaers_data['symptoms'] = vaers_data['symptoms'].apply(literal_eval)
vaers_data['ordered_symptoms'] = vaers_data['ordered_symptoms'].apply(literal_eval)
vaers_data.head()

Unnamed: 0,vaers_id,year,vax_type,vax_manu,symptom_text,symptoms,ordered_symptoms,report_length,num_symptoms
0,1344132,2021,COVID19,MODERNA,"A few days after my vaccine, I noticed under t...","[Blister, Erythema]","[Blister, Erythema]",93,2
1,1842147,2021,COVID19,MODERNA,Period schedule on and off the chart; Increase...,"[Biopsy, Heavy menstrual bleeding, Menstrual d...","[Menstrual disorder, Heavy menstrual bleeding,...",259,4
2,1165207,2021,COVID19,MODERNA,"within 24 hours of receiving my 2nd dose, I fi...","[Chills, Dry eye, Eye pain, Fatigue, Headache,...","[Pyrexia, Chills, Headache, Myalgia, Neuralgia...",110,9
3,1618374,2021,COVID19,MODERNA,Side effects seem to have cleared up by the 17...,"[Headache, Vaccination complication]","[Vaccination complication, Headache]",241,2
4,2460242,2022,COVID19,MODERNA,I received my first Moderna vaccine on one/14/...,"[Amenorrhoea, Arthralgia, Carbohydrate antigen...","[Lymphadenopathy, Arthralgia, Pain in extremit...",162,12


## Temporal Sequence Generation Using Claude-3.5-Sonnet:

In [None]:
!pip install anthropic

Collecting anthropic
  Downloading anthropic-0.40.0-py3-none-any.whl.metadata (23 kB)
Downloading anthropic-0.40.0-py3-none-any.whl (199 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.5/199.5 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.40.0


In [None]:
from google.colab import userdata
api_key = userdata.get('anthropic_api_key')

In [None]:
import anthropic

model_name = "claude-3-5-sonnet-20241022"
client = anthropic.Anthropic(api_key=api_key)

In [None]:
def create_prompt(row):
    instruction = "Order the symptoms in chronological order based on the timeline implied in the text. Think step by step to determine the timeline for each symptom. Provide the ordered symptoms list in this format, Ordered Symptoms: [Rash, Diarrhoea]"
    prompt = f"{instruction}\n\nText: {row['symptom_text']}\n\nSymptoms: {', '.join(row['symptoms'])}\n\nOrdered Symptoms:"
    return prompt

vaers_data['prompt'] = vaers_data.apply(create_prompt, axis=1)

In [None]:
def get_ordered_symptoms(prompt_list):

    api_response = []
    ordered_symptoms = []
    pattern = r"\[.*?\]"

    for prompt in prompt_list:
        try:
            symptom_list = []
            message = client.messages.create(
                model=model_name,
                max_tokens=500,
                messages=[{"role": "user", "content": prompt}]
            )
            api_result = message.content[0].text
        except Exception as e:
            api_result = ''
            print(e)

        # Regular expression to extract text inside square brackets
        matches = re.findall(pattern, api_result)
        if matches:
            match = matches[-1]
            symptom_list = match.strip('[]').split(', ')
            symptom_list = [item.strip("'\"") for item in symptom_list]  # Clean up quotes

        else:
            print("No match found.")

        api_response.append(api_result)
        ordered_symptoms.append(symptom_list)

    return api_response, ordered_symptoms

In [None]:
claude_res, claude_symptoms = get_ordered_symptoms(vaers_data['prompt'].to_list())

In [None]:
claude_results = pd.DataFrame({
    'symptom_text': vaers_data['symptom_text'].to_list(),
    'prompt': vaers_data['prompt'].to_list(),
    'true_sequence': vaers_data['prompt'].to_list(),
    'predicted_sequence': claude_symptoms
})

In [None]:
claude_results.to_csv(os.path.join(data_dir, f'{model_name}-results.csv'), index=False)