In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pandas as pd

path = "PATH_TO_YOUR_MODEL"

In [None]:
#excel 파일명 list
excel_fname = ['dataset']

In [None]:
df = pd.read_excel(excel_fname[0])
df.head(5)

In [None]:
#Loading the Model
model = AutoModelForCausalLM.from_pretrained(path,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="main")

tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)

In [None]:
instruction = """
You are a professional doctor and pharmacist. If I ask you a medical question, answer politely and professionally, following the answering rules below.

###Response Rules###
1. Answers must have a clear medical basis or be based on dur(drug utilization review) information.
2. If the answer varies depending on the situation, answer according to each situation.
3. If you are not sure of the answer or do not know, please answer that you do not know.
4. Do not include answers with question-and-answer examples.

###Question and answer example ###
Question: Can Milta tablet be used on pregnant people?
Answer: Milta Tablet is not classified as contraindicated in the DUR information. It can be used depending on the patient's condition and the clinician's judgment."""

In [None]:
def prompt_template(question):
    prompt=f'''[INST] <<SYS>>
    {instruction}
    <</SYS>>
    {question}[/INST]
    '''
    return prompt

In [None]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.5,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

In [None]:
for fname in excel_fname:
    #엑셀 파일 pandas dataframe 변환
    df = pd.read_excel(fname)
    #question list에 질문 저장
    questions = df['crude_question']
    answers = []
    num = 0
    #질문에 대한 답변을 받아서 answers list에 저장.
    for question in questions:
        answer = pipe(prompt_template(question))[0]['generated_text'].split("<</SYS>>\n    "+question+"[/INST]\n")[1]
        answers.append(answer)
        print("question ", num," : ",question,"\n answer : ",answer)
        num = num+1
    #pandas dataframe에 Llama2 답변 추가
    df['Llama2-7b-gptq answer'] = answers
    
    #dataframe 엑셀 변환
    df.to_excel(fname, index = False)