In [1]:
!pip install -U transformers
!pip install huggingface_hub>=0.30.0
!pip install -U datasets
!pip install -U accelerate
!pip install -U bitsandbytes
!pip install -U trl
!pip install torch==2.5.1
!pip install -U peft


Collecting transformers
  Downloading transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.35.1-py3-none-any.whl.metadata (14 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting hf-xet<2.0.0,>=1.1.3 (from huggingface-hub<1.0,>=0.34.0->transformers)
  Downloading hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.7 kB)
Downloading transformers-4.56.2-py3-none-any.whl (11.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m93.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25hDownloading huggingface_hub-0.35.1-py3-none-any.whl (563 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import torch
from datasets import load_dataset
from transformers import (
                        AutoModelForCausalLM,
                        AutoTokenizer,
                        BitsAndBytesConfig,
                        TrainingArguments,
                        logging
                        )
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, setup_chat_format
import bitsandbytes as bnb
from huggingface_hub import login

In [3]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("huggingface")
login(token=secret_value_0)

In [4]:
base_model = 'google/gemma-2-2b-it'
dataset_name = 'lavita/ChatDoctor-HealthCareMagic-100k'
new_model = 'AIDoctor'


if torch.cuda.get_device_capability()[0] >= 8:
    torch_dtype = torch.bfloat16
    att_implemetation = 'flash_attention_2'
else:
    torch_dtype = torch.float16
    attn_implementation = 'eager'

In [5]:
bnb_config = BitsAndBytesConfig(
    load_in4bit = True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = torch_dtype,
    bnb_4bit_use_double_quant = True,
    
)

In [6]:
model = AutoModelForCausalLM.from_pretrained(base_model,
                                            quantization_config=bnb_config, 
                                            device_map='auto',
                                            attn_implementation=attn_implementation)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)


config.json:   0%|          | 0.00/838 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/24.2k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/47.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

In [12]:
def find_all_linear(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            if len(names) == 1:              
                lora_module_names.add(names[0])  
            else:
                lora_module_names.add(names[-1])
    lora_module_names.discard('lm_head')
    return list(lora_module_names)
modules = find_all_linear(model)

In [13]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias='none',
    task_type='CAUSAL_LM',
    target_modules=modules
)
tokenizer.chat_template=None
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

In [14]:
import re
from datasets import load_dataset

dataset = load_dataset(
   dataset_name,
   split="all",           
   cache_dir="./cache"    
)

dataset = dataset.shuffle(seed=42).select(range(2000))

def clean_text(text):
   text = re.sub(r'\b(?:www\.[^\s]+|http\S+)', '', text)                   
   text = re.sub(r'\b(?:aCht Doctor(?:.com)?(?:.in)?|www\.(?:google|yahoo)\S*)', '', text)
   text = re.sub(r'\s+', ' ', text)                                    
   return text.strip()

def format_chat_template(row):
   cleaned_instruction = clean_text(row["instruction"]) 
   cleaned_input = clean_text(row["input"])             
   cleaned_output = clean_text(row["output"])          
   
   row_json = [
       {"role": "system", "content": cleaned_instruction},  
       {"role": "user", "content": cleaned_input},
       {"role": "assistant", "content": cleaned_output}
   ]
   row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
   return row

dataset = dataset.map(format_chat_template, num_proc=4)
dataset = dataset.train_test_split(test_size=0.1)
data_collator = lambda batch: tokenizer(
   batch["text"], 
   return_tensors="pt",    
   padding=True,           
   truncation=True         
)

README.md:   0%|          | 0.00/542 [00:00<?, ?B/s]

data/train-00000-of-00001-5e7cb295b9cff0(…):   0%|          | 0.00/70.5M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/112165 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/2000 [00:00<?, ? examples/s]

  self.pid = os.fork()


In [17]:
training_args = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    eval_steps=200,
    save_steps=500,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=0.0002,
    fp16=True,
    bf16=False,
    group_by_length=True,
    load_best_model_at_end=False,
    report_to=[]
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    args=training_args,
)

model.config.use_cache = False



Adding EOS to train dataset:   0%|          | 0/1800 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1800 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/1800 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

In [18]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 1, 'bos_token_id': 2, 'pad_token_id': 0}.


Step,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
200,1.7193,2.129171,2.141169,116114.0,0.553228
400,1.5274,2.084175,2.113051,229662.0,0.557381
600,1.5723,2.057403,2.013895,343270.0,0.5631
800,2.1356,2.034151,2.008118,456439.0,0.567304




TrainOutput(global_step=900, training_loss=2.0712419319152833, metrics={'train_runtime': 1871.7003, 'train_samples_per_second': 0.962, 'train_steps_per_second': 0.481, 'total_flos': 6310787016522240.0, 'train_loss': 2.0712419319152833, 'epoch': 1.0})

In [19]:
merged_model = model.merge_and_unload()
merged_model.save_pretrained(new_model)
merged_model.push_to_hub(new_model, use_temp_dir=False)




Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

CommitInfo(commit_url='https://huggingface.co/ayubkhonibrokhimzoda/AIDoctor/commit/2c128666661a472a94456ca8f13243c123822841', commit_message='Upload Gemma2ForCausalLM', commit_description='', oid='2c128666661a472a94456ca8f13243c123822841', pr_url=None, repo_url=RepoUrl('https://huggingface.co/ayubkhonibrokhimzoda/AIDoctor', endpoint='https://huggingface.co', repo_type='model', repo_id='ayubkhonibrokhimzoda/AIDoctor'), pr_revision=None, pr_num=None)

In [20]:
from transformers import GenerationConfig

messages = [
    {"role": "system", "content": "You are a medical expert specializing in respiratory diseases. You should prescribe some medical drugs"},
    {"role": "user", "content": "I have a persistent cough, night sweats, and recent weight loss. I’ve been to multiple doctors with no diagnosis yet. Could these symptoms be related to tuberculosis or another serious illness? Please provide a detailed answer considering possible causes and recommended next steps. Write down medicines that can cure my illness"}
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(
    **inputs,
    max_length=350,          
    top_k=50,                
    top_p=0.85,               
    temperature=0.3,         
    no_repeat_ngram_size=3,  
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("assistant")[-1].strip()
print(response)


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


I understand your concern. It's important to rule out serious illnesses like tuberculosis. However, I must emphasize that I am an AI and cannot provide medical advice. The information below is for general knowledge and informational purposes only, and does not constitute medical advice, diagnosis, or treatment. It is essential to consult with a qualified healthcare professional for any health concerns or before making any decisions related to your health or treatment.**

Here's a breakdown of the symptoms you mentioned and possible causes:

**Persistent Cough:**
* **Possible causes:**
    * **Asthma:** Chronic inflammation of the airways, leading to coughing, wheezing, and shortness of breath.
    - **Chronic Obstructive Pulmonary Disease (COPD):** Long-term lung disease that causes airflow obstruction and breathing-related problems.
*  **Tuberculosis:** A bacterial infection that can cause a persistent, dry cough, often with blood-tinged sputum.

**Night Sweats:**
- **Possible Causes: