## Importações

In [7]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers import TrainingArguments
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
import warnings
warnings.filterwarnings('ignore')

## Verificando a GPU

In [8]:
if torch.cuda.is_available():
    print(f'Numero de GPUs: {torch.cuda.device_count()}')
    print(f'Modelo GPU: {torch.cuda.get_device_name(0)}')
    print(f'Total Memória [GB] da GPU: {torch.cuda.get_device_properties(0).total_memory / 1e9}')

Numero de GPUs: 1
Modelo GPU: NVIDIA GeForce RTX 3060
Total Memória [GB] da GPU: 12.623478784


In [9]:
from numba import cuda
device = cuda.get_current_device()
device.reset()

## Carregando o Dataset

In [10]:
dataset = load_dataset('nlpie/Llama2-MedTuned-Instructions')

In [11]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 200252
    })
    validation: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 70066
    })
})

In [12]:
for i in range(3):
    data = dataset['train'][i]
    print(f'Data point {i + 1}:')
    print(f'Instrution: {data["instruction"]}')
    print(f'Input: {data["input"]}')
    print(f'Output: {data["output"]}')
    print('\n---------------------------------\n')

Data point 1:
Instrution: In your role as a medical professional, address the user's medical questions and concerns.
Input: My relative suffering from secondary lever cancer ( 4th stage as per Allopathic doctor) and primary is in rectum. He is continuously with 103 to 104 degree F fever. Allpathic doctor suggested chemo only after fever subsidises. Is treatment possible at Lavanya & what is the time scale of recover.
Output: Hi, dairy have gone through your question. I can understand your concern. He has rectal cancer with liver metastasis. It is stage 4 cancer. Surgery is not possible at this stage. Only treatment options are chemotherapy and radiotherapy according to type of cancer. Inspite of all treatment prognosis is poor. Life expectancy is not good. Consult your doctor and plan accordingly. Hope I have answered your question, if you have any doubts then contact me at bit.ly/ Chat Doctor. Thanks for using Chat Doctor. Wish you a very good health.

--------------------------------

## Diminuindo a Amostra para Acelerar o Tempo deTreino

In [13]:
dataset['train'] = dataset['train'].select(range(3500))

In [14]:
dataset['test'] = dataset['train'].select(range(300))

In [15]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 3500
    })
    validation: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 70066
    })
    test: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 300
    })
})

## Ajustando o Formato dos Dados de Entrada

In [16]:
def cria_prompt(sample):
    prompt = sample['instruction']
    prompt += sample['input']
    single_turn_prompt = f'Instruction: {prompt}<|end_of_turn|>AI Assistant: {sample["output"]}'
    return single_turn_prompt

In [17]:
cria_prompt(dataset['train'][0])

"Instruction: In your role as a medical professional, address the user's medical questions and concerns.My relative suffering from secondary lever cancer ( 4th stage as per Allopathic doctor) and primary is in rectum. He is continuously with 103 to 104 degree F fever. Allpathic doctor suggested chemo only after fever subsidises. Is treatment possible at Lavanya & what is the time scale of recover.<|end_of_turn|>AI Assistant: Hi, dairy have gone through your question. I can understand your concern. He has rectal cancer with liver metastasis. It is stage 4 cancer. Surgery is not possible at this stage. Only treatment options are chemotherapy and radiotherapy according to type of cancer. Inspite of all treatment prognosis is poor. Life expectancy is not good. Consult your doctor and plan accordingly. Hope I have answered your question, if you have any doubts then contact me at bit.ly/ Chat Doctor. Thanks for using Chat Doctor. Wish you a very good health."

## Parâmetros de Quantização

In [18]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= 'nf4',
    bnb_4bit_compute_dtype= 'float16',
    bnb_4bit_use_double_quant= True
)

## Carregando o LLM e o Tokenizador

In [19]:
repositorio_hf = 'berkeley-nest/Starling-LM-7B-alpha'

In [20]:
modelo_llm = AutoModelForCausalLM.from_pretrained(repositorio_hf,
                                                  quantization_config = bnb_config,
                                                  device_map = 'auto',
                                                  use_cache = False)

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:17<00:00,  5.71s/it]


In [21]:
tokenizer = AutoTokenizer.from_pretrained(repositorio_hf)

In [22]:
tokenizer.pad_token = tokenizer.eos_token

In [23]:
tokenizer.padding_side = 'right'

## Gerando Resposta com o LLM Antes do Ajuste Fino

In [24]:
def gera_resposta_antes_fine_tuning(prompt, model):

    encoded_input = tokenizer(
        prompt,
        return_tensors= 'pt',
        add_special_tokens= True
    )

    model_inputs = encoded_input.to('cuda')

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens = 1024,
        do_sample = True,
        pad_token_id = tokenizer.eos_token_id
    )

    decoded_output = tokenizer.batch_decode(generated_ids)

    return decoded_output[0].replace(prompt, '')

In [25]:
prompt = """Instruction: Your goal is to determine the relationship between the two provided clinical sentences and classify them into one of the following categories:
Contradiction: If the two sentences contradict each other. Neutral: If the two sentences are unrelated to each other. Entailment: If one of the sentences logically entails the other. """
prompt += '''Sentence 1: For his hypotension, autonomic testing confirmed orthostatic hypotension. Sentence 2: the patient has orthostatic hypotension <|end_of_turn|>'''
prompt += "AI Assistant:"

In [26]:
print(prompt)

Instruction: Your goal is to determine the relationship between the two provided clinical sentences and classify them into one of the following categories:
Contradiction: If the two sentences contradict each other. Neutral: If the two sentences are unrelated to each other. Entailment: If one of the sentences logically entails the other. Sentence 1: For his hypotension, autonomic testing confirmed orthostatic hypotension. Sentence 2: the patient has orthostatic hypotension <|end_of_turn|>AI Assistant:


In [27]:
gera_resposta_antes_fine_tuning(prompt, modelo_llm)

'<s> Instruction: Your goal is to determine the relationship between the two provided clinical sentences and classify them into one of the following categories:\nContradiction: If the two sentences contradict each other. Neutral: If the two sentences are unrelated to each other. Entailment: If one of the sentences logically entails the other. Sentence 1: For his hypotension, autonomic testing confirmed orthostatic hypotension. Sentence 2: the patient has orthostatic hypotension <|end_of_turn|> AI Assistant: Entailment<|end_of_turn|>'

## Parâmetros LoRa para PEFT

In [28]:
peft_config = LoraConfig(r = 8,
                         lora_alpha= 16,
                         lora_dropout= 0.05,
                         bias= 'none',
                         task_type= 'CAUSAL_LM')

In [29]:
modelo_llm = prepare_model_for_kbit_training(modelo_llm)

In [30]:
modelo_llm = get_peft_model(modelo_llm, peft_config)

## Argumentos de Treino

In [31]:
training_arguments = TrainingArguments(
    output_dir= 'modelo ajustado',
    per_device_train_batch_size= 1,
    gradient_accumulation_steps= 4,
    optim= 'paged_adamw_32bit',
    learning_rate= 2e-4,
    lr_scheduler_type= 'cosine',
    save_strategy= 'epoch',
    logging_steps= 10,
    num_train_epochs= 1,
    max_steps= 250,
    fp16= True
)

## Parâmetros do Supervised Fine-Tuning Trainer (SFFT)

In [32]:
trainer = SFTTrainer(
    model= modelo_llm,
    peft_config= peft_config,
    max_seq_length= 512,
    tokenizer= tokenizer,
    packing= True,
    formatting_func= cria_prompt,
    args= training_arguments,
    train_dataset= dataset['train'],
    eval_dataset= dataset['test']
)

max_steps is given, it will override any value given in num_train_epochs


## Treinamento (Ajuste Fino) do LLM

In [33]:
%%time
trainer.train()

Step,Training Loss
10,2.1941
20,2.0035
30,1.831
40,1.7943
50,1.6035
60,1.5321
70,1.3366
80,1.4068
90,1.4824
100,1.3483


CPU times: user 12min 47s, sys: 14min 40s, total: 27min 28s
Wall time: 27min 29s


TrainOutput(global_step=250, training_loss=1.4704985122680665, metrics={'train_runtime': 1649.0101, 'train_samples_per_second': 0.606, 'train_steps_per_second': 0.152, 'total_flos': 2.185444196352e+16, 'train_loss': 1.4704985122680665, 'epoch': 0.5211047420531527})

In [34]:
trainer.save_model('modelo ajustado')

In [35]:
modelo_final = modelo_llm.merge_and_unload()

## Gerando Texto com o LLM

In [36]:
def gera_resposta_depois_fine_tuning(prompt, model):
    encoded_input = tokenizer(
        prompt,
        return_tensors = 'pt',
        add_special_tokens = True
    )

    model_inputs = encoded_input.to('cuda')

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens = 512,
        do_sample = True,
        use_cache = False,
        pad_token_id = tokenizer.eos_token_id
    )

    decoded_output = tokenizer.batch_decode(generated_ids)

    return decoded_output[0]

### Inferência 1

In [37]:
%%time
prompt = "Instruction: In your role as a medical professional, address the user's medical questions and concerns. "
prompt += "I have a white tab under my tounge that is not only painful when i touch it but bleeds as well. not sure what it is, or why I got it. Can you give me any advise? <|end_of_turn|> "
prompt += "AI Assistant:"
response = gera_resposta_depois_fine_tuning(prompt, modelo_final)
print(response)

<s> Instruction: In your role as a medical professional, address the user's medical questions and concerns. I have a white tab under my tounge that is not only painful when i touch it but bleeds as well. not sure what it is, or why I got it. Can you give me any advise? <|end_of_turn|>  AI Assistant: Hello and thanks for your message. It sounds like you have a canker sore or some form of intraoral pathology including but not limited to: oral ulcers, ulcerative stomatitis, herpes stomatitis, oral thrush, or oral trauma from improper brushing. As for treatment recommendations: 1. Oral antiseptics: Chlorhexidine mouthwash is a good treatment option. You can get it from your local pharmacy or most supermarkets. 2. Oral analgesics/pain relief: Over-the-counter nonsteroidal anti-inflammatory drugs (NSAIDs) such as ibuprofen, naproxen, and aspirin have analgesic and anti-inflammatory effects. These should be used as directed by the label on the bottle or by your physician. In addition, a topic

In [38]:
%%time
prompt = "Instruction: In your capacity as a healthcare expert, offer insights and recommendations in response to users' medical inquiries. "
prompt += "I have terrible anxiety and depression. I've tried various therapists and pills, but nothing's helped. <|end_of_turn|> "
prompt += "AI Assistant:"
response = gera_resposta_depois_fine_tuning(prompt, modelo_final)
print(response)

<s> Instruction: In your capacity as a healthcare expert, offer insights and recommendations in response to users' medical inquiries. I have terrible anxiety and depression. I've tried various therapists and pills, but nothing's helped. <|end_of_turn|>  AI Assistant: It's great that you have sought help from your medical providers for your anxiety and depression. It's understandable that previous treatments haven't completely relieved your symptoms. Here are some recommendations:

1) Keep a mood and anxiety journal: This can help you track your moods and identify any specific triggers. It's also a useful tool to share with your healthcare team.

2) Medication: Your psychiatrist might consider changing or adjusting your current medication or trying a different one. It's important to talk to them about any side-effects and how they're affecting your day-to-day life.

3) Psychological therapy: Your therapist can work with you to explore coping strategies, relaxation techniques, and cognit

In [39]:
%%time
prompt = "Instruction: As a medical chatbot, your responsibility is to provide information and guidance on medical matters to users. "
prompt += "Hi sir, I am so happy with this website. First of all thanks for giving this opportunity. I am the  Software employee.My age is 24. My height is 169cm .Recently I got back pain and some pain in chest. How can i get relief from those pains.How i improve my health and which type of diseases will attack to my life in future. Please give Some health tips for heart and kidneys protection. <|end_of_turn|> "
prompt += "AI Assistant:"
response = gera_resposta_depois_fine_tuning(prompt, modelo_final)
print(response)

<s> Instruction: As a medical chatbot, your responsibility is to provide information and guidance on medical matters to users. Hi sir, I am so happy with this website. First of all thanks for giving this opportunity. I am the  Software employee.My age is 24. My height is 169cm .Recently I got back pain and some pain in chest. How can i get relief from those pains.How i improve my health and which type of diseases will attack to my life in future. Please give Some health tips for heart and kidneys protection. <|end_of_turn|>  AI Assistant: Hi, Thank you for reaching out to us. It sounds like your back pain is a result of a disc lesion and may also be secondary to muscular straining. This can be treated symptomatically with pain relievers such as Tylenol, and muscle relaxants such as Flexeril. Ice to the back, followed by heat helps tremendously. I also recommend getting a CT scan to be certain which disc is herniating as well as obtaining a good physical exam performed by either your fa