# Instalação das dependências

In [23]:

!pip uninstall -y packaging
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers peft accelerate bitsandbytes
!pip install transformers datasets
!pip install langchain
!pip install langchain-community

Found existing installation: packaging 25.0
Uninstalling packaging-25.0:
  Successfully uninstalled packaging-25.0
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-b664m7wy/unsloth_55a294bfeb50429c847ea850ac476283
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-b664m7wy/unsloth_55a294bfeb50429c847ea850ac476283
  Resolved https://github.com/unslothai/unsloth.git to commit 010775fbdebecf3f413002e593161393c72c0a09
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting packaging (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Using cached packaging-25.0-py3-none-any



# Importação das Bibliotecas

In [24]:
import torch
import json
import pandas as pd
import os
from unsloth import FastLanguageModel, is_bfloat16_supported
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from langchain_core.prompts import PromptTemplate
from langchain_community.llms import Ollama
from langchain_core.callbacks.manager import CallbackManager
from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import JSONLoader
from langchain_core.language_models.llms import LLM
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from transformers import TextStreamer
from langchain_core.prompts import PromptTemplate
from typing import Any, List, Mapping, Optional


DATA_PATH = "train.csv"
OUTPUT_PATH_DATASET = "/content/formatted_news_dataset_chat_data_complement.json"
max_seq_length = 2048
dtype = None
load_in_4bit = True
fourbit_models = [
    "unsloth/mistral-7b-v0.3-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/llama-3-8b-bnb-4bit",
    "unsloth/llama-3-8b-Instruct-bnb-4bit",
    "unsloth/llama-3-70b-bnb-4bit",
    "unsloth/Phi-3-mini-4k-instruct",
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit",
]

# Formatação e separação do arquivo de entrada

In [13]:
def format_dataset_into_model_input(data):
    def separate_text(dialog):
        input_text = dialog[0]
        response = dialog[1]
        instruction = "Você é uma psicóloga clínica formada, ética e empática. Retorne conselhos bons para as questões apresentadas. com educação e profissionalismo, sempre sugira que o paciente procure um médico humano e especialista"
        return instruction, input_text, response

    # Inicializando as listas para armazenar os dados
    instructions = []
    inputs = []
    outputs = []

    # Processando o dataset
    for dialog in data:
        instruction, input_text, response = separate_text(dialog)
        instructions.append(instruction)
        inputs.append(input_text)
        outputs.append(response)

    # Criando o dicionário final
    formatted_data = {
        "instruction": instructions,
        "input": inputs,
        "output": outputs
    }

    # Salvando o resultado em um arquivo JSON
    with open(OUTPUT_PATH_DATASET, 'w') as output_file:
        json.dump(formatted_data, output_file, indent=4)

    print(f"Dataset salvo em {OUTPUT_PATH_DATASET}")

# Carregandos os dados do Dataset

In [15]:
data = load_dataset("csv", data_files=DATA_PATH)
ds = load_dataset("Ayansk11/Mental_health_data_conversational")

df_train = data['train'].to_pandas()
df_complement = ds['train'].to_pandas()
print(df_complement.head())

# Transforma os valores do dataset em uma lista para formatação no modelo
dataList = df_complement.values.tolist();
format_dataset_into_model_input(dataList)

                                             Context  \
0  I'm going through some things with my feelings...   
1  I'm going through some things with my feelings...   
2  I'm going through some things with my feelings...   
3  I'm going through some things with my feelings...   
4  I'm going through some things with my feelings...   

                                            Response  
0  If everyone thinks you're worthless, then mayb...  
1  Hello, and thank you for your question and see...  
2  First thing I'd suggest is getting the sleep y...  
3  Therapy is essential for those that are feelin...  
4  I first want to let you know that you are not ...  
Dataset salvo em /content/formatted_news_dataset_chat_data_complement.json


# Extração e tokenização do modelo

In [16]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2026.1.2: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


# Configuração para o treinamento
## Processo de tokenização dos inputs e responses

In [17]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):

        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset

OUTPUT_PATH_DATASET = "/content/formatted_news_dataset_chat_data_complement.json"

dataset = load_dataset("json", data_files=OUTPUT_PATH_DATASET, split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/3512 [00:00<?, ? examples/s]

# Treinamento do Modelo

In [18]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = [
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0.0,
    bias = "none",
    use_gradient_checkpointing = True,
    random_state = 3407,
)


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 1,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

trainer_stats = trainer.train()

Unsloth 2026.1.2 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/3512 [00:00<?, ? examples/s]

# Teste do modelo treinado

In [20]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Voc\u00ea \u00e9 uma psic\u00f3loga cl\u00ednica formada, \u00e9tica e emp\u00e1tica. N\u00e3o realize diagn\u00f3sticos. N\u00e3o substitua atendimento profissional.",
        "I would like to improve my self-control, because there are times when I can't manage my emotions and end up having exaggerated reactions.", # input
        "",
    )
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Você é uma psicóloga clínica formada, ética e empática. Não realize diagnósticos. Não substitua atendimento profissional.

### Input:
I would like to improve my self-control, because there are times when I can't manage my emotions and end up having exaggerated reactions.

### Response:
I would suggest that you first make a list of the situations where you have trouble managing your emotions. Then, think of the ways that you have managed your emotions in those situations. How have you been able to calm down? What has helped you to calm down? Write down your list of ways to calm down. Then, the next time you are in a situation where you are having trouble managing your emotions, refer to your list and try one of the ways that you have found to be effective. If you are having trouble coming up with 

# Criação do LLM personalizado

In [28]:
class UnslothLLM(LLM):
    model: Any
    tokenizer: Any
    alpaca_prompt: str
    max_new_tokens: int = 64

    @property
    def _llm_type(self) -> str:
        return "unsloth_fine_tuned_model"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
    ) -> str:
        FastLanguageModel.for_inference(self.model)
        inputs = self.tokenizer(
            [self.alpaca_prompt.format(
                "Você é um assistente de IA que fornece recomendações para médicos terapeutas. Baseado nos sintomas do paciente, você irá sugerir o que o médico poderia dizer e quais recomendações gerais poderiam ser feitas. Mantenha as sugestões simples e diretas. Sempre enfatize a importância de o paciente buscar um médico humano e especialista. Por favor, responda sempre em português.",
                prompt,
                "",
            )], return_tensors = "pt").to("cuda")

        outputs = self.model.generate(**inputs, max_new_tokens = self.max_new_tokens, use_cache = True)
        decoded_output = self.tokenizer.batch_decode(outputs)
        response_text = decoded_output[0]

        start_tag = "### Response:"
        end_tag = self.tokenizer.eos_token
        start_index = response_text.find(start_tag)
        if start_index != -1:
            response_text = response_text[start_index + len(start_tag):].strip()
        end_index = response_text.find(end_tag)
        if end_index != -1:
            response_text = response_text[:end_index].strip()

        return response_text

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"model_name": self._llm_type}

# Instanciamento do unsloth_llm para interação com o usuário
unsloth_llm = UnslothLLM(
   model=model,
   tokenizer=tokenizer,
   alpaca_prompt=alpaca_prompt,
   max_new_tokens=128,
)

# Criação da variável de controle do contexto do chat

In [30]:
# Inicialização do History
chat_history = []
MAX_TURNS = 4  # número de interações (usuário + assistente)

# Armazenar o histórico do chat
def trim_history():
    global chat_history
    if len(chat_history) > MAX_TURNS * 2:
        chat_history = chat_history[-MAX_TURNS * 2:]

# Função que retorna a resposta do modelo treinado

In [58]:
import torch
from unsloth import FastLanguageModel

FastLanguageModel.for_inference(model)

def chat(user_input):
    global chat_history

    # Adiciona fala do usuário
    chat_history.append(f"Paciente: {user_input}")
    trim_history()

    # Histórico formatado
    history_text = "\n".join(chat_history)

    # Prompt final
    prompt = alpaca_prompt.format(
        "Você é um agregador de informações sobre conteúdo de sáude mental. "
        "Sempre sugira que o paciente procure um médico ou psicólogo humano especializado. "
        "Considere o histórico da conversa abaixo para manter coerência.",
        f"Histórico da conversa:\n{history_text}\n\nPaciente: {user_input}",
        "Não retorne frases repetidas sem necessidade"
        "Não dê diagnósticos precipitados sem que o usuário dê explicitamente o prognóstico"
        "Devolva como resposta a instrução do que a psicologa deve falar para o paciente"
    )


    inputs = tokenizer(
        [prompt],
        return_tensors="pt",
        truncation=True,
        max_length=2048
    ).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.4,
            do_sample=True,
            use_cache=True
        )

    decoded = tokenizer.decode(
        outputs[0],
        skip_special_tokens=True
    )

    # Extrai só a resposta do assistente
    response = decoded.split("### Response:")[-1].strip()

    # Salva resposta na memória
    chat_history.append(f"Psicóloga: {response}")

    return response

# Interação do usuário com o chat

In [59]:
print("Olá! Sou a Synapse. Por favor, insira os sintomas do paciente ou digite 'sair' para sair.")

while True:
    patient_symptoms = input("Sintomas do paciente (ou 'sair' para sair): ")
    if patient_symptoms.lower() == 'sair':
        print("Encerrando a sessão. Tenha um bom dia!")
        break

    if patient_symptoms:
        try:
            # Invoke dp UnslothLLM com os sintomas do paciente
            model_chat = chat(patient_symptoms)
            recommendation = unsloth_llm.invoke(model_chat)
            print(f"Recomendação da Synapse: {recommendation}")
            print("-" * 80)
        except Exception as e:
            print(f"Ocorreu um erro ao processar a solicitação: {e}")
    else:
        print("Por favor, insira alguns sintomas para obter uma recomendação.")

Olá! Sou a Synapse. Por favor, insira os sintomas do paciente ou digite 'sair' para sair.
Sintomas do paciente (ou 'sair' para sair): meu paciente está se sentindo sobrecarregado no trabalho
Recomendação da Synapse: I'm not sure what you're asking. If you're asking for a general response to a client, I would suggest starting with a broad question that invites the client to share more about their concerns.  For example, "How can I help you today?" or "What brings you in today?"  After the client answers, you can ask follow-up questions that help you better understand the client's concerns.  If you're asking for a response to a specific concern, it would be helpful to know what the concern is.
--------------------------------------------------------------------------------
Sintomas do paciente (ou 'sair' para sair): O paciente me falou que está se sentindo triste e sobrecarregado
Recomendação da Synapse: You are a counselor.  As a counselor, you are a professional who has been trained to

KeyboardInterrupt: Interrupted by user