<h4>
I used hugging face cli to access LLAMA2. In this code using LLAMA2,we extract the main symproms and ssave it in symptom-description.csv. 
</h4>

In [2]:
from transformers import AutoTokenizer
import transformers
import torch

model = "meta-llama/Llama-2-7b-chat-hf" #mistralai/Mistral-7B-v0.1 # meta-llama/Llama-2-7b-hf

tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from transformers import pipeline

llama_pipeline = pipeline(
    "text-generation",  # LLM task
    model=model,
    torch_dtype=torch.float16,
    device_map="auto"
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:21<00:00, 10.57s/it]


In [5]:
def get_llama_response(prompt: str) -> None:
    """
    Generate a response from the Llama model.

    Parameters:
        prompt (str): The user's input/question for the model.

    Returns:
        None: Prints the model's response.
    """
    sequences = llama_pipeline(
        prompt,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=256,
    )
    return ("Chatbot:", sequences[0]['generated_text'])

In [15]:
#Zero shot 
prompt = ''' 
[INST]
Extract main symptoms from the sentence above  ?  "
Text: My skin has been peeling, especially on my knees, elbows, and scalp. This peeling is often accompanied by a burning or stinging sensa
'''

zeroshot=get_llama_response(prompt)

In [21]:
#Zero shot + Role
prompt = ''' 
[INST]
Extract main symptoms from the sentence above and retrun them only as tagged as a list called mainsymptoms? P "
Text:My skin has been peeling, especially on my knees, elbows, and scalp. This peeling is often accompanied by a burning or stinging sensa

[/INST]
'''
zeroplusrole=get_llama_response(prompt)


In [22]:
#Few-shot + Role
prompt = ''' 
[INST]
Extract main symptoms from the sentence above and retrun them only as tagged as a list called mainsymptoms? P "
Text: I have been experiencing a skin rash on my arms, legs, and torso for the past few weeks. It is red, itchy, and covered in dry, scaly patches. 
[/INST]
'''

prompt = '''
[INST] <<SYS>>
Extract main symptoms from the sentence above and retrun them only as tagged as a list called mainsymptoms?
<</SYS>>
Text: I have been experiencing a skin rash on my arms, legs, and torso for the past few weeks. It is red, itchy, and covered in dry, scaly patches. 
[/INST]
mainsymptoms=["skin_rash", "red_skin", "itchy", "dry", "scaly_patches"]
[INST]
My skin has been peeling, especially on my knees, elbows, and scalp. This peeling is often accompanied by a burning or stinging sensa
[/INST]
'''
fewshot=get_llama_response(prompt)

In [24]:
print("zero shot output-->", (zeroshot[1].split('\n')[-1]))
print ("zero plus role output-->", zeroplusrole[1].split('\n')[-1])

#Few shot had a better out put and was structured the way we wanted it so we are usign 
#fewshot prompt to annotate the data 
print ("fews shot plus role -->", fewshot[1].split('\n')[-1])

zero shot output--> Note: These are the main symptoms mentioned in the sentence, but there may be other symptoms present depending on the context of the text.
zero plus role output--> mainsymptoms = ["peeling", "burning", "stinging sensation"]
fews shot plus role --> mainsymptoms=["peeling_skin", "burning_sensation", "stinging_sensation"]


In [40]:
# prompt can called usign this function 
def create_prompt(inp):
    return f'''
    [INST] <<SYS>>
    Extract main symptoms one or two words from the text, return just the symptom tagged as a list called mainsymptoms?
    <</SYS>>
    Text: My nails have small dents or pits in them, and they often feel inflammatory and tender to the touch. Even there are minor rashes on my arms.
    [/INST]
    mainsymptoms=[dents, inflammation, skin_rash, tender_to_touch]
    [INST]
    {inp}
    [/INST]
    '''

<h3>Extracting symptoms in bulk </h3>

In [35]:
#This function cleans the output and prints just the extracted symptom 
def return_list (my_output):
    splitted=my_output.split ("\n")
    #mainsym=(splitted[10].split("=")[1])
    try:
        mainsym = (splitted[10].split("=")[1])
    except IndexError:
        try:
            mainsym = (splitted[11].split("=")[1])
        except IndexError:
            mainsym = "NaN"  # Handle the case where both indices are out of range
            return (mainsym)
    
    mainsym= mainsym.split(",")
    #print  (mainsym.split (","))
    sym=[]
    for i in mainsym:
        k=i.replace("\"", "")
        k= k.replace ("[","")
        k= k.replace ("]","")
        sym.append(k)
    return (sym)


In [27]:
#read symptom-description , col extracted_sym will be filled  
import pandas as pd
df= pd.read_csv("symptom-description.csv")


In [48]:
#Run it in parrallel of 32 batches for efficent use of the gpu
batch_size = 32
inputs = df.iloc[:, 1].tolist()
outputs = []

# Processing in batches
for i in range(0, len(inputs), batch_size):
    batch_inputs = inputs[i:i+batch_size]
    prompts = [create_prompt(inp) for inp in batch_inputs]
    batch_outputs = llama_pipeline(prompts)  # Adjust max_length as needed
    
    for output in batch_outputs:
        mylist = return_list(output[0]['generated_text'])
        mylist=(",".join(mylist))
        outputs.append(mylist)

# Convert the outputs to strings before assigning to the DataFrame
outputs_str = [output for output in outputs]

# Adding the results back to the dataframe
df.iloc[:len(outputs_str), 2] = outputs_str

In [50]:
#Save the anotated symptom description 
df.to_csv("symptom-description.csv", index=False)
