INSTALL PACKAGES

In [1]:
# !pip install --upgrade accelerate peft bitsandbytes transformers pdfplumber datasets==2.16.1 trl==0.7.9 --target=/kaggle/working/packages
%pip install --upgrade accelerate peft transformers pdfplumber datasets==2.16.1 trl
%pip install bitsandbytes --prefer-binary --extra-index-url=https://jllllll.github.io/bitsandbytes-windows-webui


Note: you may need to restart the kernel to use updated packages.
Looking in indexes: https://pypi.org/simple, https://jllllll.github.io/bitsandbytes-windows-webui
Note: you may need to restart the kernel to use updated packages.


**To resolve some issues while installing packages**

In [None]:
# import pkg_resources

# def resolve_conflicts():
#     """
#     Resolves dependency conflicts by creating a new environment and installing compatible versions.
#     """

#     # Create a new virtual environment (replace "myvenv" with your desired name)
#     !virtualenv myvenv
#     !source myvenv/bin/activate

#     # Install compatible versions of conflicting packages
#     required_versions = {
#         "cupy-cuda11x": ">=12.0.0",
#         "dill": "<0.3.2,>=0.3.1.1",
#         "numpy": "<1.25.0,>=1.14.3",
#         "pyarrow": "<10.0.0,>=3.0.0",
#         "jupyter-server": "~=1.16",
#         "jupyterlab": "~=3.4",
#         "urllib3": "<2.1,>=1.25.4",  # Ensure python_version >= "3.10"
#         "pandas": "<1.6.0dev0,>=1.3",
#         "protobuf": "<5,>=4.21",
#         "dask": "==2023.7.1",
#         "distributed": "==2023.7.1",
#         "fsspec": "==2023.6.0",
#         "urllib3": "<2.0",
#         "google-api-core[grpc]": "<2.0.0dev,>=1.22.2",
#         "packaging": "<22.0dev,>=14.3",
#         "grpcio": "<2.0dev,>=1.51.3",
#         "jupyter-lsp": ">=2.0.0",
#         "google-cloud-storage": "<3,>=2.2.1",
#         "shapely": ">=2.0.1",
#         "numpy": "<1.25,>=1.21",
#         "cryptography": "<42,>=38.0.0",
#         "numpy": "<1.22.2,>=1.15.0",
#         "scipy": "<1.8.0,>=1.7.3",
#         "fsspec": "==2023.12.2",
#         "typing-extensions": "<4.6.0,>=3.6.6",
#         "platformdirs": "<4,>=2.4",
#         "numpy": "<1.24,>=1.16.0",
#         "pandas": "!=1.4.0,<2.1,>1.1",
#         "scipy": "<1.12,>=1.4.1",
#     }

#     for package, version_spec in required_versions.items():
#         !pip install "{package} {version_spec}"

#     # Verify that conflicts are resolved
#     working_set = pkg_resources.WorkingSet()
#     if not working_set.require(required_versions):
#         raise RuntimeError("Dependency conflicts could not be resolved.")

#     print("Dependency conflicts resolved!")

# if __name__ == "__main__":
#     resolve_conflicts()


**Importing necessary packages**

In [1]:
import os
import torch
import transformers
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import prepare_model_for_kbit_training
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


**Model: https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3?text=My+name+is+Clara+and+I+am**

In [7]:
# Model from Hugging Face hub
base_model = "PY007/TinyLlama-1.1B-Chat-v0.3"

# Fine-tuned model
new_model = "TinyLlama-chat-medaid-model"

**Reading the PDF and storing it as a text file**

In [15]:
# READING THE NEW CONVERSATION DATASET WITH ALL THE TEXT FILES (doctor-patient-conversation-large)

# Directory containing your text files
text_files_directory = r"G:\LLM-Model-MedAid-Thesis\Model-3\Dataset"

# List to store individual conversation texts
conversation_texts = []

# Loop through each text file in the directory
for filename in os.listdir(text_files_directory):
    if filename.endswith(".txt"):
        file_path = os.path.join(text_files_directory, filename)
        with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
            # Use errors="ignore" to skip characters that cannot be decoded
            conversation_texts.append(file.read())

# Save the combined text to a file
output_path = r"G:\LLM-Model-MedAid-Thesis\Model-3\combined_conversations.txt"
with open(output_path, "w", encoding="utf-8") as output_file:
    for text in conversation_texts:
        output_file.write(text + "\n\n")

dataset = load_dataset("text", data_files=output_path, split="train")


Generating train split: 53861 examples [00:00, 1282424.91 examples/s]


In [None]:
print(dataset)

**This code has a configuration for quantization using the *BitsAndBytesConfig class* from the *trl library*. Quantization is a technique used in deep learning to reduce the memory and computational requirements of neural networks.**

In [5]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    # bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
)

In [None]:

from torch.utils.cpp_extension import CUDA_HOME
print(CUDA_HOME) # by default it is set to /usr/local/cuda/

**This code is using the AutoModelForCausalLM class from the transformers library to instantiate a pre-trained causal language model with specific configurations.**

In [8]:
import torch
print("Cuda ", torch.cuda.is_available())

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    # device_map={"": 0}
    device_map="auto", # automatically figures out how to best use CPU + GPU for loading model
    trust_remote_code=True, # prevents running custom model files on your machine
)

model.config.use_cache = False
model.config.pretraining_tp = 1


Cuda  True


**This code is using the AutoTokenizer class from the transformers library to instantiate a tokenizer for a pre-trained language model.**

In [9]:

# Create the tokenizer on the chosen device
# tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Prepare Model for Training

In [10]:
model.train() # model in training mode (dropout modules are activated)

# enable gradient check pointing
model.gradient_checkpointing_enable()

# enable quantized training
model = prepare_model_for_kbit_training(model)

**This code is defining a configuration for the LoRA (Local Reparameterization with Attention) model using the LoraConfig class, which appears to be part of the peft library.**

In [11]:
# Create a LoraConfig instance
peft_params = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# LoRA trainable version of model
model = get_peft_model(model, peft_params)

# trainable parameter count
model.print_trainable_parameters()

trainable params: 720,896 || all params: 1,100,781,568 || trainable%: 0.0654894686608706


**Preparing Training Dataset**

In [None]:
# # create tokenize function
# def tokenize_function(examples):
#     # extract text
#     text = examples["example"]

#     #tokenize and truncate text
#     tokenizer.truncation_side = "left"
#     tokenized_inputs = tokenizer(
#         text,
#         return_tensors="np",
#         truncation=True,
#         max_length=512
#     )

#     return tokenized_inputs

# # tokenize training and validation datasets
# tokenized_data = dataset.map(tokenize_function, batched=True)

In [12]:
# setting pad token
tokenizer.pad_token = tokenizer.eos_token
# data collator
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)

**This code is defining a set of training parameters using the TrainingArguments class, which is often used in the transformers library for configuring training settings.**

In [13]:
# hyperparameters
lr = 2e-4
batch_size = 4
num_epochs = 10

# Specify the output directory and other training parameters
training_params = TrainingArguments(
    output_dir="./results",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    num_train_epochs=num_epochs,
    gradient_accumulation_steps=4,
    # save_steps=25,
    logging_strategy="epoch",
    save_strategy="epoch",
    # logging_steps=25,  
    weight_decay=0.01,
    # load_best_model_at_end=True,
    warmup_steps=2,
    fp16=True,
    optim="paged_adamw_8bit",
    # bf16=False,
    # max_grad_norm=0.3,
    # max_steps=-1,
    # warmup_ratio=0.03,
    # group_by_length=True,
    # lr_scheduler_type="constant",
    # report_to="tensorboard"
)

**This code is creating an instance of the SFTTrainer class, presumably from the trl library, to facilitate the training of a model using the specified configuration.**

**SFTTrainer instance is configured with the** 
* model, 
* training dataset, 
* Peft configuration, 
* tokenizer,
* training arguments. 

**The specific behavior and training process are determined by the SFTTrainer implementation in the trl library, and the configured parameters influence aspects such as optimization, learning rate, and model architecture during training.**

In [16]:
# Create the trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params,
    dataset_text_field="text",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
    data_collator=data_collator
)

Map: 100%|██████████| 53861/53861 [00:00<00:00, 89470.38 examples/s]


In [None]:
# train model
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
# trainer.train()

# renable warnings
model.config.use_cache = True

**The code you provided is saving the trained model and its associated tokenizer to a specified directory using the save_pretrained method.**

In [3]:
model_path = "preTrained_model"

In [18]:
trainer.save_model(model_path)
tokenizer.save_pretrained(model_path)

# trainer.model.save_pretrained(new_model)
# trainer.tokenizer.save_pretrained(new_model)

('preTrained_model\\tokenizer_config.json',
 'preTrained_model\\special_tokens_map.json',
 'preTrained_model\\tokenizer.model',
 'preTrained_model\\added_tokens.json',
 'preTrained_model\\tokenizer.json')

In [4]:
# Import the pretrained model
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


**This code creates a simple conversational loop that simulates a doctor assistant interaction. It uses a fine-tuned language model (assumed to be a text generation model) and a tokenizer to generate responses based on user input. The conversation is logged in a text file, and the loop continues until the user provides an exit signal.**

In [25]:
# logging.set_verbosity(logging.CRITICAL)

# Initial prompt
prompt = "Be a doctor assistant. And keep questioning one by one to extract symptoms and history of the patient. Don't give advice or ask anything else. Just extract symptoms or history by questioning one question at a time."

# Create a pipeline for text generation using the fine-tuned model and tokenizer
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)

# File to save the conversation
output_file_path = "G:\LLM-Model-MedAid-Thesis\Model-3\conversation_log.txt"  # Update 'your_folder' with the desired folder in your Drive

# Function to ask a question and get the user's response
def ask_question_and_log(prompt, user_response, file_path):
    # Ask the question
    result = pipe(f"<s>[Prompt] {prompt} \n [User response] \n {user_response} \n [/User Response]")

    # Get the generated text (question)
    generated_text = result[0]['generated_text']

    # Print and save the generated text (question)
    print(generated_text)
    with open(file_path, "a", encoding="utf-8") as output_file:
        output_file.write(f"[Model] {generated_text}\n\n")

    # Simulate user answering the question
    user_response = input("Your response: ")  # User provides input

    return user_response

# Initial user response
user_response = "I have chest pain."

# Ask a question based on the user's response and log the conversation
with open(output_file_path, "a", encoding="utf-8") as output_file:
    output_file.write("\nConversation started.\n\n")

# Loop to continue the conversation
while True:
    user_response = ask_question_and_log(prompt, user_response, output_file_path)

    # Check for an exit condition (e.g., user response indicating the end of the conversation)
    if "exit" in user_response.lower():
        print("Ending the conversation.")

        # Save the conversation to a file
        with open(output_file_path, "a", encoding="utf-8") as output_file:
            output_file.write("\nConversation ended by user. ---------------- \n\n")

        break


Both `max_new_tokens` (=32) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


<s>[Prompt] Be a doctor assistant. And keep questioning one by one to extract symptoms and history of the patient. Don't give advice or ask anything else. Just extract symptoms or history by questioning one question at a time. 
 [User response] 
 I have chest pain. 
 [/User Response] 

[Prompt] Now, ask me about my symptoms. 
 [User response] 
 I have chest pain and dizz
Ending the conversation.


In [26]:
# Create a pipeline for text generation
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.float16, device_map="auto")

# File to save the conversation
output_file_path = "G:\LLM-Model-MedAid-Thesis\Model-3\conversation_log.txt"  # Update 'your_folder' with the desired folder in your Drive

# Function to format the prompt
def format_prompt(prompt, user_response):
    return f"### Human: {user_response} ### Assistant: {prompt}"

# Function to start the conversation and log it
def start_conversation_and_log(file_path):
    # Start conversation by asking why the user has come today
    initial_prompt = "As a doctor assistant, I need to understand why you've come today. Please describe your symptoms or reasons for the visit."
    print(initial_prompt)
    with open(file_path, "a", encoding="utf-8") as output_file:
        output_file.write(f"[System] {initial_prompt}\n\n")

# # Function to ask a question and get the user's response
# def ask_question_and_log(user_response, file_path):
#     # Ask the question
#     result = pipe(f"<s>[Prompt] What else would you like to share about your symptoms or concerns? \n [User response] \" {user_response} \" [/User Response]")

#     # Get the generated text (question)
#     generated_text = result[0]['generated_text']

#     # Print and save the generated text (question)
#     print("Response: ", generated_text)
#     with open(file_path, "a", encoding="utf-8") as output_file:
#         output_file.write(f"[Model] {generated_text}\n\n")

#     # Simulate user answering the question
#     user_response = input("Your response: ")  # User provides input

#     return user_response


# Function to ask a question and get the user's response
def ask_question_and_log(user_response, file_path):
    # Ask the question
    result = pipe(f"<s><Prompt> Please provide additional details about your symptoms or concerns. [User response] \"{user_response}\" [/User Response]")

    # Get the generated text (question)
    generated_text = result[0]['generated_text']

    # Extract the follow-up question from the generated text
    follow_up_question = generated_text.strip()

    # Print and save the follow-up question
    print(f"User response: \"{user_response}\"")
    print(f"Model response: \"{follow_up_question}\"")
    with open(file_path, "a", encoding="utf-8") as output_file:
        output_file.write(f"[User response] \"{user_response}\" [/User Response]\n")
        output_file.write(f"[Model response] \"{follow_up_question}\" [/Model Response]\n")

    # Simulate user answering the question
    user_response = input("Your response: ")  # User provides input

    return user_response





# Start the conversation
start_conversation_and_log(output_file_path)

# Ask questions based on the user's responses and log the conversation
with open(output_file_path, "a", encoding="utf-8") as output_file:
    output_file.write("\nConversation started.\n\n")

# Initial user response
user_response = input("Your response: ")  # User provides input

# Loop to continue the conversation
while True:
    user_response = ask_question_and_log(user_response, output_file_path)

    # Check for an exit condition (e.g., user response indicating the end of the conversation)
    if "exit" in user_response.lower():
        print("Ending the conversation.")

        # Save the conversation to a file
        with open(output_file_path, "a", encoding="utf-8") as output_file:
            output_file.write("\nConversation ended by user. ---------------- \n\n")

        break


As a doctor assistant, I need to understand why you've come today. Please describe your symptoms or reasons for the visit.
User response: "I have chest pain"
Model response: "<s><Prompt> Please provide additional details about your symptoms or concerns. [User response] "I have chest pain" [/User Response] "I have dizziness" [/Prompt]
I'm not sure what prompt to use for this prompt. I'm not sure what"
Ending the conversation.


In [23]:
import transformers

# Create a pipeline for text generation
pipe = transformers.pipeline(task="text-generation", model=model, tokenizer=tokenizer)

# File to save the conversation
output_file_path = "G:\LLM-Model-MedAid-Thesis\Model-3\conversation_log.txt"  # Update 'your_folder' with the desired folder in your Drive

# Function to format the prompt
def format_prompt(prompt, user_response):
    return f"### Human: {user_response} ### Assistant: {prompt}"

# Function to start the conversation and log it
def start_conversation_and_log(file_path):
    # Start conversation by asking why the user has come today
    initial_prompt = "As a doctor assistant, I need to understand why you've come today. Please describe your symptoms or reasons for the visit."
    print(initial_prompt)
    with open(file_path, "a", encoding="utf-8") as output_file:
        output_file.write(f"[System] {initial_prompt}\n\n")

# Function to ask a question and get the user's response
def ask_question_and_log(user_response, file_path):
    # Format the prompt
    formatted_prompt = format_prompt("Please provide additional details about your symptoms or concerns.", user_response)

    # Generate response from the model
    sequences = pipe(
        formatted_prompt,
        do_sample=True,
        top_k=50,
        top_p=0.7,
        num_return_sequences=1,
        repetition_penalty=1.1,
        max_new_tokens=500,
    )

    # Extract the generated text (follow-up question)
    follow_up_question = sequences[0]['generated_text'].split("### Assistant: ")[-1]

    # Print and save the follow-up question
    print(f"Model response: \"{follow_up_question}\"")
    with open(file_path, "a", encoding="utf-8") as output_file:
        output_file.write(f"[Model response] \"{follow_up_question}\" [/Model Response]\n")

    # Extract the follow-up question from the model response
    extracted_question = follow_up_question.strip().split(" ")[0]

    # Return the extracted question
    return extracted_question

# Start the conversation
start_conversation_and_log(output_file_path)

# Ask questions based on the user's responses and log the conversation
with open(output_file_path, "a", encoding="utf-8") as output_file:
    output_file.write("\nConversation started.\n\n")

# Initial user response
user_response = input("Your response: ")  # User provides input

# Loop to continue the conversation
while True:
    follow_up_question = ask_question_and_log(user_response, output_file_path)

    # Simulate user answering the question
    user_response = input(f"Your response to \"{follow_up_question}\": ")  # User provides input

    # Check for an exit condition (e.g., user response indicating the end of the conversation)
    if "exit" in user_response.lower():
        print("Ending the conversation.")

        # Save the conversation to a file
        with open(output_file_path, "a", encoding="utf-8") as output_file:
            output_file.write("\nConversation ended by user. ---------------- \n\n")

        break

As a doctor assistant, I need to understand why you've come today. Please describe your symptoms or reasons for the visit.
Model response: "Great! Then please fill out your insurance"
Ending the conversation.
