# Setup

## Gdrive setup

In [1]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
!pip install python-dotenv

In [2]:
from google.colab import drive
drive.mount('/content/drive/')
project_path = "/content/drive/MyDrive/PhD/work/MediChat"

Mounted at /content/drive/


In [3]:
!cp -r /content/drive/MyDrive/PhD/work/MediChat/* /content/

# Load training data

In [4]:
from datasets import load_from_disk

# Load the dataset from the directory
train_dataset = load_from_disk("data/trainChatDoctor")

# Check the size of the loaded dataset
print(f"Loaded subset size: {len(train_dataset)}")

Loaded subset size: 800


# Load Model

In [5]:
from unsloth import FastLanguageModel
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [6]:
from src.models.model import load_model

model_name = "unsloth/Meta-Llama-3.1-8B"
model, tokenizer = load_model(model_name)

==((====))==  Unsloth 2025.1.5: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

Unsloth 2025.1.5 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


# Data processing

In [7]:
from src.data.data_processing import formatting_prompts
from functools import partial

# Create a partial function with the tokenizer
formatting_with_tokenizer = partial(formatting_prompts, tokenizer=tokenizer)

# Preprocess the dataset
formatted_dataset = train_dataset.map(formatting_with_tokenizer, batched=True)


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

In [8]:
from src.models.train import train

trainer = train(model, tokenizer, formatted_dataset, output_dir="models", num_train_epochs=2)

Unsloth: Already have LoRA adapters! We shall skip this step.


Map (num_proc=2):   0%|          | 0/800 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 800 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,2.9334
2,2.7078
3,2.7515
4,2.8365
5,2.64
6,2.5003
7,2.2611
8,2.3292
9,2.1333
10,2.1176


AttributeError: 'NoneType' object has no attribute 'train'

### save model

In [10]:
# Local saving
model.save_pretrained("medichat_model")
tokenizer.save_pretrained("medichat_model")

('medichat_model/tokenizer_config.json',
 'medichat_model/special_tokens_map.json',
 'medichat_model/tokenizer.json')

In [11]:
from app.huggingface import login_huggingface

login_huggingface()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Logged in to Hugging Face successfully!


In [12]:
model.push_to_hub("NourEldin-Ali/medichat_model") # Online saving
tokenizer.push_to_hub("NourEldin-Ali/medichat_model") # Online saving

README.md:   0%|          | 0.00/594 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Saved model to https://huggingface.co/NourEldin-Ali/medichat_model


tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

In [17]:
model.push_to_hub_gguf("NourEldin-Ali/medichat_model", tokenizer)

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 6.1 out of 12.67 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 32/32 [06:22<00:00, 11.94s/it]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving NourEldin-Ali/medichat_model/pytorch_model-00001-of-00004.bin...
Unsloth: Saving NourEldin-Ali/medichat_model/pytorch_model-00002-of-00004.bin...
Unsloth: Saving NourEldin-Ali/medichat_model/pytorch_model-00003-of-00004.bin...
Unsloth: Saving NourEldin-Ali/medichat_model/pytorch_model-00004-of-00004.bin...
Done.
==((====))==  Unsloth: Conversion from QLoRA to GGUF information
   \\   /|    [0] Installing llama.cpp might take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF 16bits might take 3 minutes.
\        /    [2] Converting GGUF 16bits to ['q8_0'] might take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: Installing llama.cpp. This might take 3 minutes...
Unsloth: [1] Converting model at NourEldin-Ali/medichat_model into q8_0 GGUF format.
The output location will be /content/NourEldin-Ali/medichat_model/unsloth.Q8_0.gguf
This might take 3 minutes...
INFO:hf-to-gguf:Loading model: me

unsloth.Q8_0.gguf:   0%|          | 0.00/8.54G [00:00<?, ?B/s]

Saved GGUF to https://huggingface.co/NourEldin-Ali/medichat_model


# Evaluation

In [None]:
from src.models.model import load_model

model_name = "NourEldin-Ali/medichat_model"
model, tokenizer = load_model(model_name)

## load test dataset

In [2]:
from datasets import load_from_disk

# Load the dataset from the directory
test_dataset = load_from_disk("data/testChatDoctor")

# Check the size of the loaded dataset
print(f"Loaded subset size: {len(test_dataset)}")

Loaded subset size: 200


In [None]:
from src.models.model import get_response
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Initialize BLEU score calculation
smooth = SmoothingFunction()

# Store BLEU scores
bleu_scores = []

# Iterate over each row in the dataset
for idx, row in enumerate(test_dataset):
    input_text = row["input"]  # Input text for the model
    reference = [row["output"].split()]  # Ground truth response, tokenized

    # Generate model response
    generated_response = get_response(model, tokenizer, input_text)
    candidate = generated_response.split()  # Tokenized model output

    # Calculate BLEU score for the current pair
    bleu_score = sentence_bleu(reference, candidate, smoothing_function=smooth.method1)
    bleu_scores.append(bleu_score)

    # print progress
    # if idx % 1 == 0:
    print(f"Processed {idx} rows... BLEU score: {bleu_score:.4f}")

# Calculate average BLEU score
average_bleu = sum(bleu_scores) / len(bleu_scores)
print(f"Average BLEU score: {average_bleu:.4f}")