# Importações

In [1]:
import torch
from trl import SFTTrainer
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers import pipeline, TrainingArguments
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import LLMChain
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


# Carregando o Dataset para o Instruction Fine-Tuning

In [2]:
dataset = load_dataset('nlpie/Llama2-MedTuned-Instructions')

In [3]:
data_train = dataset['train'].select(indices= range(1000))

In [4]:
data_train

Dataset({
    features: ['instruction', 'input', 'output', 'source'],
    num_rows: 1000
})

In [5]:
data_test = dataset['train'].select(indices = range(1000, 1200))

In [6]:
data_test

Dataset({
    features: ['instruction', 'input', 'output', 'source'],
    num_rows: 200
})

# Compreendendo o Formato dos Dados de Textoabs

In [7]:
for i in range(3):
    data = dataset['train'][i]
    print(f'Ponto de Dado {i + 1}:')
    print(f'Instruction: {data['instruction']}')
    print(f'Input: {data['input']}')
    print(f'Output: {data['output']}')
    print('\n---------------------------------------------------\n')

Ponto de Dado 1:
Instruction: In your role as a medical professional, address the user's medical questions and concerns.
Input: My relative suffering from secondary lever cancer ( 4th stage as per Allopathic doctor) and primary is in rectum. He is continuously with 103 to 104 degree F fever. Allpathic doctor suggested chemo only after fever subsidises. Is treatment possible at Lavanya & what is the time scale of recover.
Output: Hi, dairy have gone through your question. I can understand your concern. He has rectal cancer with liver metastasis. It is stage 4 cancer. Surgery is not possible at this stage. Only treatment options are chemotherapy and radiotherapy according to type of cancer. Inspite of all treatment prognosis is poor. Life expectancy is not good. Consult your doctor and plan accordingly. Hope I have answered your question, if you have any doubts then contact me at bit.ly/ Chat Doctor. Thanks for using Chat Doctor. Wish you a very good health.

----------------------------

# Automatizando a Criação dos Prompts para Treinamento do Modelo

In [8]:
def cria_prompt(sample):

    pre_prompt = """[INST]<<SYS>> {instruction}\n"""
    prompt = pre_prompt + '{input}' + '[/INST]'+'\n{output}'

    example_instruction = sample['instruction']
    example_input = sample['input']
    example_output = sample['output']

    prompt_template = PromptTemplate(template= prompt,
                                     input_variables= ['instruction', 'input', 'output'])
    prompt_unico = prompt_template.format(instruction = example_instruction,
                                          input = example_input,
                                          output = example_output)

    return prompt_unico

In [9]:
prompt = cria_prompt(data_train[0])
print(prompt)

[INST]<<SYS>> In your role as a medical professional, address the user's medical questions and concerns.
My relative suffering from secondary lever cancer ( 4th stage as per Allopathic doctor) and primary is in rectum. He is continuously with 103 to 104 degree F fever. Allpathic doctor suggested chemo only after fever subsidises. Is treatment possible at Lavanya & what is the time scale of recover.[/INST]
Hi, dairy have gone through your question. I can understand your concern. He has rectal cancer with liver metastasis. It is stage 4 cancer. Surgery is not possible at this stage. Only treatment options are chemotherapy and radiotherapy according to type of cancer. Inspite of all treatment prognosis is poor. Life expectancy is not good. Consult your doctor and plan accordingly. Hope I have answered your question, if you have any doubts then contact me at bit.ly/ Chat Doctor. Thanks for using Chat Doctor. Wish you a very good health.


In [10]:
prompt = cria_prompt(data_test[0])
print(prompt)

[INST]<<SYS>> In the clinical text, your objective is to identify relationships between medical problems, treatments, and tests. Medical problems are tagged as @problem$, medical tests as @test$, and treatments as @treatment$. Classify the relationship between two entities as one of the following:
Treatment improves medical problem (TrIP)
Treatment worsens medical problem (TrWP)
Treatment causes medical problem (TrCP)
Treatment is administered for medical problem (TrAP)
Treatment is not administered because of medical problem (TrNAP)
Test reveals medical problem (TeRP)
Test conducted to investigate medical problem (TeCP)
Medical problem indicates medical problem (PIP)
No Relations
Include @treatment$ 50 mgs bid , Aricept 10 mgs qhs , @treatment$ 15 mgs bid , Trazodone 100 mgs qhs .[/INST]
No Relations


# Processo de Quantização

In [11]:
use_4bit = True

In [12]:
bnb_4bit_compute_dtype = 'float16'

In [13]:
bnb_4bit_quant_type = 'nf4'

In [14]:
use_nested_quant = False

In [15]:
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

In [16]:
bnb_config = BitsAndBytesConfig(load_in_4bit= use_4bit,
                                bnb_4bit_quant_type= bnb_4bit_quant_type,
                                bnb_4bit_compute_dtype= compute_dtype,
                                bnb_4bit_use_double_quant= use_nested_quant)

In [17]:
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print('=' * 80)
        print('A GPU suporta bfloat16. Acelere o treinamento usando bf16=True')
        print('=' * 80)

A GPU suporta bfloat16. Acelere o treinamento usando bf16=True


# Carregando o LLM e o Tokenizador

In [18]:
llm = 'NousResearch/Llama-2-7b-chat-hf'

In [19]:
tokenizer = AutoTokenizer.from_pretrained('NousResearch/Llama-2-7b-chat-hf')

In [20]:
model = AutoModelForCausalLM.from_pretrained(llm,
                                             quantization_config = bnb_config,
                                             device_map = 'auto',
                                             use_cache = False)

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:12<00:00,  6.09s/it]


In [21]:
tokenizer.pad_token = tokenizer.eos_token

In [22]:
tokenizer.padding_side = 'right'

# Configurando Adaptadores LoRa

In [23]:
peft_config = LoraConfig(r = 8,
                         lora_alpha= 16,
                         lora_dropout= 0.05,
                         bias = 'none',
                         task_type= 'CAUSAL_LM')

In [24]:
lora_model = prepare_model_for_kbit_training(model)

In [25]:
lora_model = get_peft_model(lora_model, peft_config)

# Parâmetros de Ajuste Fino

In [26]:
output_model = 'modelo_ajustado'

In [27]:
training_arguments = TrainingArguments(output_dir= output_model,
                                       per_device_train_batch_size= 1,
                                       gradient_accumulation_steps= 4,
                                       optim = 'paged_adamw_32bit',
                                       learning_rate = 2e-4,
                                       lr_scheduler_type= 'cosine',
                                       save_strategy= 'epoch',
                                       logging_steps= 10,
                                       num_train_epochs= 3,
                                       max_steps= 150,
                                       fp16= True)

In [28]:
trainer = SFTTrainer(model = lora_model,
                     peft_config= peft_config,
                     max_seq_length= 512,
                     tokenizer = tokenizer,
                     packing = True,
                     formatting_func= cria_prompt,
                     args= training_arguments,
                     train_dataset= data_train,
                     eval_dataset= data_test)

max_steps is given, it will override any value given in num_train_epochs


# Treinamento do Ajuste Fino

In [29]:
trainer.train()

Step,Training Loss
10,2.6241
20,2.2108
30,1.9563
40,1.8402
50,1.6913
60,1.7884
70,1.4235
80,1.5679
90,1.4167
100,1.4747


TrainOutput(global_step=150, training_loss=1.676869862874349, metrics={'train_runtime': 1173.1661, 'train_samples_per_second': 0.511, 'train_steps_per_second': 0.128, 'total_flos': 1.2186386694144e+16, 'train_loss': 1.676869862874349, 'epoch': 1.0507880910683012})

In [30]:
trainer.save_model('modelo_final')

In [31]:
merged_model = lora_model.merge_and_unload()

# Construindo o Pipeline de Geração de Texto com LangChain

In [32]:
pre_prompt = """[INST] <<SYS>>\nAnalyse the question and answer with the best option.\n"""

In [33]:
prompt = pre_prompt + "Here is my question {context}" + "[\INST]"

In [34]:
prompt = PromptTemplate(template= prompt, input_variables= ['context'])

In [46]:
pipe = pipeline('text-generation',
                model = merged_model,
                tokenizer = tokenizer,
                max_new_tokens = 512,
                use_cache = False,
                do_sample = True,
                pad_token_id = tokenizer.eos_token_id,
                top_p = 0.7,
                temperature = 0.5)

In [47]:
llm_pipeline = HuggingFacePipeline(pipeline= pipe)

# Criando a LLM Chain

In [48]:
memory = ConversationBufferMemory()

In [49]:
chat_llm_chain = LLMChain(llm = llm_pipeline,
                          prompt= prompt,
                          verbose = False,
                          memory= memory)

# Deploy do Modelo e Uso do Sistema de Perguntas e Respostas

In [50]:
contexto = '''###Question: All of the following provisions are included in the Primary health care according to the Alma Ata declaration except:
###Options:
A. Adequate supply of safe drinking water
B. Nutrition
C. Provision of free medicines
D. Basic sanitation'''

In [51]:
%%time
chat_llm_chain.predict(context = contexto)

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


CPU times: user 5min 24s, sys: 1min 56s, total: 7min 21s
Wall time: 7min 21s


"[INST] <<SYS>>\nAnalyse the question and answer with the best option.\nHere is my question ###Question: All of the following provisions are included in the Primary health care according to the Alma Ata declaration except:\n###Options:\nA. Adequate supply of safe drinking water\nB. Nutrition\nC. Provision of free medicines\nD. Basic sanitation[\\INST]  Great, let's analyze the question and answer options:\n\nQuestion: All of the following provisions are included in the Primary health care according to the Alma Ata declaration except:\n\nOptions:\nA. Adequate supply of safe drinking water\nB. Nutrition\nC. Provision of free medicines\nD. Basic sanitation\n\nThe Alma Ata declaration is a landmark international document that sets out the principles of primary health care. It was adopted by the World Health Organization (WHO) in 1978 and has since been widely adopted by countries around the world.\n\nThe declaration emphasizes the importance of primary health care as a foundation for achie