**Работа с данным ноутбуком проводилась в Colab**

In [None]:
!pip install -U bitsandbytes transformers accelerate peft datasets trl

Collecting bitsandbytes
  Downloading bitsandbytes-0.49.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting peft
  Downloading peft-0.18.1-py3-none-any.whl.metadata (14 kB)
Collecting datasets
  Downloading datasets-4.4.2-py3-none-any.whl.metadata (19 kB)
Collecting trl
  Downloading trl-0.26.2-py3-none-any.whl.metadata (11 kB)
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.2 kB)
Downloading bitsandbytes-0.49.1-py3-none-manylinux_2_24_x86_64.whl (59.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading peft-0.18.1-py3-none-any.whl (556 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.0/557.0 kB[0m [31m46.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-4.4.2-py3-none-any.whl (512 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m512.3/512.3 kB[0m [31m47.7 MB/s[

In [None]:
import torch
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
from peft import (
    LoraConfig,
    prepare_model_for_kbit_training,
    get_peft_model,
)
from trl import SFTConfig, SFTTrainer

# Обучение адаптера на исходный QA данных

In [None]:
from google.colab import files

# Загрузка файла с локального ПК
uploaded = files.upload()

Saving Doctor-HealthCare-100k.csv to Doctor-HealthCare-100k.csv


In [None]:
df = pd.read_csv('Doctor-HealthCare-100k.csv')
df = df.sample(1000)
df.head(1)


Unnamed: 0,instruction,input,output
109421,"If you are a doctor, please answer the medical...",I m 12weeks pregnant with my second child. I h...,"Hi there, thanks for the query. It looks like ..."


In [None]:
instruction_text = (
    "You are a licensed medical doctor. Respond in a professional, neutral, and explanatory tone."
)

df["instruction"] = instruction_text
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1000 entries, 109421 to 96749
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   instruction  1000 non-null   object
 1   input        1000 non-null   object
 2   output       1000 non-null   object
dtypes: object(3)
memory usage: 31.2+ KB


In [None]:
model_id = "Qwen/Qwen2.5-3B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,  # КРИТИЧНО
)

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

tokenizer.padding_side = "right"



model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
)

model.config.pad_token_id = tokenizer.pad_token_id

model = prepare_model_for_kbit_training(model)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 2048)
    (layers): ModuleList(
      (0-35): 36 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear4bit(in_features=2048, out_features=2048, bias=True)
          (k_proj): Linear4bit(in_features=2048, out_features=256, bias=True)
          (v_proj): Linear4bit(in_features=2048, out_features=256, bias=True)
          (o_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear4bit(in_features=2048, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=2048, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=2048, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): Qwen2RMSNorm((2048,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((2048,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm

In [None]:
def formatting_func(example):
    messages = [
        {"role": "system", "content": example["instruction"]},
        {"role": "user", "content": example["input"]},
        {"role": "assistant", "content": example["output"]},
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False,
    )
    return {"text": text}

dataset = Dataset.from_pandas(df)
dataset = dataset.map(
    formatting_func,
    remove_columns=dataset.column_names,
)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 29,933,568 || all params: 3,115,872,256 || trainable%: 0.9607


In [None]:
sft_config = SFTConfig(
    output_dir="./ROW_Qwen3B_QLoRA",

    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,

    optim="adamw_torch",        # ← безопасный оптимизатор
    learning_rate=9e-5,         # Сделал чуть ниже среднего
    lr_scheduler_type="cosine",

    fp16=False,                 # ← ВАЖНО
    bf16=False,                 # ← ВАЖНО
    max_grad_norm=0.0,          # ← КРИТИЧНО (иначе падение)

    num_train_epochs=1,

    dataset_text_field="text",
    max_length=1024,
    packing=False,

    logging_steps=10,
    report_to="none",
    remove_unused_columns=False,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=sft_config,
)

trainer.train()

# сохранить ТОЛЬКО обученный LoRA-адаптер
model.eval()

adapter_path = "./ROW_Qwen3B_QLoRA/adapter"
model.save_pretrained(adapter_path)
tokenizer.save_pretrained(adapter_path)

Adding EOS to train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None}.


Step,Training Loss
10,2.9863
20,2.5665
30,2.2565
40,2.3612
50,2.322
60,2.296
70,2.2386
80,2.3561
90,2.1317
100,2.2929


In [None]:
!tar -czf ROW_Qwen3B_QLoRA.tar.gz ROW_Qwen3B_QLoRA

from google.colab import files
files.download("ROW_Qwen3B_QLoRA.tar.gz")

^C


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import gc
import torch

def clear_gpu_memory():
    """Полностью очищает память GPU без перезагрузки среды"""
    print("Очистка памяти GPU...")

    # 1. Очистка кеша PyTorch
    torch.cuda.empty_cache()

    # 2. Удаление переменных из памяти Python
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
                # Удаляем тензоры
                del obj
        except:
            pass

    # 3. Принудительный сбор мусора
    gc.collect()

    # 4. Ещё раз очистка кеша CUDA
    torch.cuda.empty_cache()

    # 5. Освобождаем память от больших объектов
    for i in range(2):
        gc.collect()
        torch.cuda.empty_cache()

    print(f"Свободная память GPU: {torch.cuda.memory_allocated()/1e9:.2f} GB занято, "
          f"{torch.cuda.memory_reserved()/1e9:.2f} GB зарезервировано")
    print("Очистка завершена!")

torch.cuda.empty_cache()
clear_gpu_memory()

Очистка памяти GPU...


  return isinstance(obj, torch.Tensor)
  if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):


Свободная память GPU: 7.13 GB занято, 10.26 GB зарезервировано
Очистка завершена!


# Инференс для сравнения BaseLine Модели и двух адаптеров QLoRA

In [None]:
# ----------------------------
# DATA (30 QA)
# ----------------------------
queries = [
    "Hi! Please help,I have a small, slightly bumpy white splotch on the inside of both my cheeks. It is surrounded by small prominent purple veins. Is this cancerous? Im a 23 year old non-smoking male. Should I wait a few days or see a dr immediately? Thank you for your kindness.",
    "my 1 month old son is showing signs of gas problem symptoms are -1) after feeding burping is not always proper gas will not come if we tap his back for 10-15 min 2) after some time he will show like he is swallowing some thing which has again come to his mouth from stomach, 3) some times milk will come threw his nose , 4)after sleeping 1/2 hour to 1 hour he will cry in sleep like he has some difficulty what i has to do can i try colicaid or grip water",
    "Hi, may I answer your health queries right now ? Please type your query here...My 20 year old son is almost always tired and sleeps a LOT. This started 4-5 years ago and he had a very involved medical work-up at that time, but they did not come up with any cause or what to do about it. What could cause this? Is it worth trying a medical workup again?",
    "hi Docter, I am 24 yrs old female staying in bangalore for the past 4 yrs. I am have Oily hair due to which i feel that I am facing hair loss as i have to shampoo my hair every alternate day. Can you please suggest me some measures to stop hair loss. Is it the water which is the problem??",
    "My son has been diagnosed with 3mm vsd in echo, he was 6days then, paed cardiologist had put him on furosemide drops, the baby feeds very well, now he is 17days old, but continous urination since past few days bothering me very much since he i7 unable to achieve the desired weight, should i stop the furosemide drop,",
    "Hi, my sister has a benign lipoma in her left wrist. She has had it since she was little and she s now 28. She has had surgery on it three times now, but doctors haven t been able to remove it because it s intertwined with nerves. Earlier today, it started to hurt more than usual and get really hot. She tried putting ice on it to cool it down, but it only helped a little and it quickly got hot again after the ice was taken off. Is this something we should be concerned about? Or is there something we can do for it?",
    "I tested very high SGPT/SGOT levels. SGPT level is 122 and SGOT is 149.I have been advised to take Udiliv 300mg & mecobion-od twice daily. Would like to know if it is ok to have such medication and how serious may be my present condition and effect on my health. Is Any precaution I may need to take and any possible risks, as I am extremely tense about the situation. Many thanks in advance to advise.",
    "I have a lump on the roof of my mouth ( the dentist had told me at one time, that I must have had it since I was a child) it is starting to bother me about a mouth ago. It feels like sand paper was rub on it and now all I feel is rawness, my sinuses are bothering me also, can this be connected? My ears feel stuffy and slightly burning and my throat feels like cotton is stuck in it. Please let me know what you think. I will also book an appointment with my Doctor.",
    "Hello Im Joelmy girlfriend, after having 1 week period delay, took a pregnancy test, which resulted positive. However, 2 weeks later she blood-checked in a hospital and the resulted NOT pregnant. Though her period didnt come yet, I wasnt given any clear answer from the hospital, please help! thank you",
    "I have been diagnosed with GAD since recovering from prostate cancer & possible duodenal cancer that was determined to be non malignant both within the past 2 years. I have tried 5 different SSRI s and 3 different SSNI s, none of which provided any relief. Side effects for most made my anxiety worse. I have also seen 4 psychiatrists with some minor success. CBT offering some help, the others, EMDR & Talk Therapy very little. The only relief I get is with 1MG Lorazepam as needed with no side effects. Why all the negativity re this medication.",
    "i have been suffering right upper abdominal pain for the last 6 years.CT SCAN, ENDOSCOPY, ULTRASONOGRAHY,VARIOUS BLOOD TEST, AND HEPATO BILLIARY TEST FOUND NOTHING.NOW I AM FINDING A LITTLE HARD LIKE A PIPE THING JUST BELOW RIGT RIB CAGE. PLEASE SUGGEST.",
    "i have developed belle palseythree days ago,i had shingles vaccination and flue vaccination done about two weeks ago Brain CT sacan and Mri normal,blood work normal,no diabetes,doctor say i have this condition due to schlnles live virus infection.i have no previous history of trauma. Is it okay to have one or two alcohol.",
    "Hi my nephew is in thailand and has fractured his skull in two places resulting in atleast one bleed to his brain, he is on drugs to combat the bleeding, the doctor said he is willing to sign a form for him to fly 13 hrs home to uk, is this safe to do so?? thank you",
    "recently told I had plaque thinning the arteries in my brain. can you give me good links that will tell me what this means, what caused it and how it can be fixed? I would like to know the good, bad and ugly.I have a multitude of medical conditions and would like to know if this is due to any of those conditions. thanks.",
    "hi yesterday my dad had a fit - my mum described it like this",
    "Hello I m a 23 year old female been having sharp chest pains. Started a few months ago I would get them randomly and then it would go away. Just last week it started but hasn t went away went the to the er last Friday and they said it was pleurisy never gave me any type of x ray or ct scan just diagnosed me n sent me home w a shot and prescription for naprosyn500mg. I have yet to get the medicine due to money issues at the moment but the pain hurts so bad when I breath and I just took 4 200mg ibprof. N still feel the pain",
    "Hi, I m a woman in her late thirties. I am experiencing significant (obvious) swelling in my feet, ankles and calves almost every day. It goes away overnight (while I am asleep in bed) but seems to show up again every evening. I also experience puffiness in my face. I am overweight but not obese, and I walk at least 1.5 miles every day. My Grandmother has congestive heart failure (has had it a long time) and she began getting symptoms like this in her thirties. Do you think that is my future as well?",
    "Hello Dr.I m 30 weeks pregnanthave detected with calculi of 3- 4 mm in both the kidneys.also pus cells with 150- 180/ hpf , protien, bacteria and yeast on 11.08.10on 12. 07.10 my urine report was pus cell with 25-30 hpf. Dr. gave me zocef (500) for five days after that pus cells reduced to 15-20 again Dr. gave me Taxim-o (200) for five days .but my pain didnt stopped im having severe pain in left side of the abdomen.and now after a month i just did my urine test my pus cell increased to 150-180 /hpf. Dr. suggested for Urine Culture test. but i will getmy report after days so till that report comes should i continue to take any antibiotic.please suggest me.Regardsmrs. shaikh",
    "My 4 month old baby has a purple/reddish lump on her mid back that seems to be getting larger. Her doctor said it could be a platelet issue, I cant remember the term she used, and that we would watch it closely. Do you know what the doctor is talking about and should I get another opinion??",
    "Hi! I am 5 wks. pregnant. I have a sluggish gallbladder, output of 27% , was to see a surgeon next week about having it removed until i found out i was pregnant. I am now having frequent loose stools. they almost appear oily or fatty.... greenish/brownish... not sure if its from the pregnancy or i guess my fear is that the gallbladder problem has created havoc on my pancreas and im freaking out",
    "Hi, Im 29 yrs old and married for 6 yrs and not yet conceived at all. Had been in treatment from the 8th month of my marriage and took breaks too. GG, had been there for almost 2 yrs with laproscopy and 5 unsuccessful IUIs done. And 2 IUIs at Prashant multi speciality hospitals, which of first happened to be weakly positive. My problem is there is no diagnosis of what my problem is. My husbands count n motility is normal. My laprscopic n follicular studies give clear positive results. Inspite i have not conceived. Really wondering to know why we have to do IVF. I have a perfect 28 days cycle. My BMI is 29(overweight) and from novemeber 2010 have been identified thyroid too. Could there be anything like egg doesnt release? What to do if so? or egg release but too thick for the sperm to penetrate? what could be the reason? What to do now?",
    "I have a tooth that was worked on 6 months ago.. they took my grey cavity out and filled it w a white one and also protected it w something.... they spent awhile on it. months later it was infected, so I took anti biotic... only bothered me a few times after minimal compared to b4 antibiotic... Now say 4 months I have a pimple like bubble on the side of my tooth... Is this conhhenry1978cerns to me suggestion I need a root canal??",
    "Our 9 yr old boy has some issues with his running. His main issue is very tight hamstrings, along with weak hips. He wears orthotics and has been to PT as well as working with a running coach to help his bio mechanics. Things have improved apart from the hamstrings which have refused to loosen. Our PT mentioned mild Tethered Cord as an option, do you think this could be a possibility. He is a big guy, already 5ft and plays soccer, BB and tennis. Thanks Phil.",
    "Hello. I am 24 years old. When I was in Second grade I got Hepatitis A. So I was between the age of 7-9. Is it still in my system after that many years? If I ever have children is there a possibility they will have it? Thanks for your time. Hope to hear from you soon.",
    "Thank you; I tripped over a paving slab on 3rd May and fell forward flat on the ground, hitting my chin and my left knee particularly. The knee was never swollen but was very bruised. The leg was sore to walk on but gradually got better until last week when I had a difficult drive to work owing to heavy traffic. After that my knee became noticeably more sore (though not extreme pain) and since then it has got worse again. No muscular pain, swelling or bruising but it gets progressively painful if I try to walk and if I touch around (as opposed to upon) the kneecap it stings. I now can t really walk or drive and am off work. Apologies for the lengthy e-mail. I would appreciate your advice.",
    "I have a feeling of a lump or something in my chest.squeezing across the mid section. plus trouble catching my breath sometimes,also a swishing noise in my chest to my head. after the squeezing and swishing I get a headache and my chest feels like I have been exercising. what is happing to me?",
    "Hello I have a lump in the centre on the roof of my mouth. It has been there for almost 10 years. It becomes swollen, irritated and itchy for periods of time (days to weeks) and then settles down again, only to come back a week or so later. It feels like it is related to my allergies (eye conjunctivitis ). I did see an ENT specialist about 6 years ago. He told me it was just some dermatitis . What I really need is for someone to tell me how I can get some relief. It is very uncomfortable.",
    "Hi, may I answer your health queries right now ? Please type your query here... I went to the doctor because of skipped heart beats, only lasting a second, which causes me to cough. Just didnt feel right. At the office my blood pressure was 169/92 which is 50 pts up for me, ekg turned out ok. He did chest xray and blood work. Waiting for results and appt. for echocardiogram. Heart feels like it beats a lot stronger.",
    "Im 54 year old female, dont smoke, about 30 pounds overweight, my blood pressure typically is around, 128 over 80-something. About 18 months ago I had a chest pain that started in what felt like my stomach and spread across my chest, it was sharp and lasted a few minutes. I never had another until recently, now I get them a couple times a week. they feel like they start under my left breast and spread across both breasts and a feeling that I need to burp comes up into my throat, but I dont burp. Sometimes, it moves into my jaw. Once its gone, I feel fine. What do you think?",
    "hellow... req u pls advise best gynecologist... five yrs has been passed we are unable to obtained one child.. in starting pregnancy was ok but after one Abortion we hv got serious prob. now we hv tried more even take consultancy with many gynecologist but unable to get pregnancy... pls help me and advise if Abortion is main reason not to getting pregnant ... ???"
]

doctor_answers = [
    "Thanks for posting your query to Chat Doctor. After going through your history, I want to assure you not to get worried about it. White spots in oral cavity can occur due to many reasons which can only be told after proper visual examination. Do you have any other skin problem as sometimes these spots may be related to other systemic conditions? I would suggest you to visit a dentist and if needed a dermatologist for proper examination and treatment. Hope my answer will help you.",
    "Hi... Thank you for consulting in Chat Doctor. This is called evening colic and is quite common in this age group. This happens when the baby sucks at the breast very fast and in eagerness to Chat Doctor. Unless the air comes out like burping or flatus this discomfort will be there. Usually I don't advice any medicines for this. The best ways are proper burping and prone position with gentle back patting.",
    "I understand your concerns. From the description, diagnosis may not be possible. But your son could be suffering from depression which can cause lethargy and excessive sleep. I would suggest consulting a psychologist without delay.",
    "It seems you are suffering from seborrheic dermatitis which can lead to hair fall. Use medicated shampoos, avoid excessive oiling, and take supplements like biotin. This should help.",
    "Furosemide is a diuretic and increased urination is expected. You should not stop it without consulting your cardiologist. Regular follow-up is important.",
    "This is most likely a ganglion rather than a lipoma. The pain and heat suggest infection. Consult an orthopedic surgeon; antibiotics and imaging like MRI may be needed.",
    "Your liver enzymes are elevated, indicating liver disease. Udiliv is appropriate. Follow a low-fat diet and do not worry excessively. Levels should normalize.",
    "Lumps on the roof of the mouth are often benign but need examination. Sinus and throat symptoms may or may not be related. Please see your doctor.",
    "Blood beta-HCG is more accurate than urine tests. If blood test is negative, pregnancy is unlikely. Ultrasound can be done if doubt persists.",
    "In resistant anxiety cases, alternative therapies like NLP or specialized acupuncture may help. Lorazepam can help symptoms but long-term strategy should be discussed.",
    "HI. This may be a very small hernia in the center which is missed by all. Another possibility is costo-chon Chat Doctor. Hard-like-pipe thing is suggestive of this. Needs the investigations on this ground and a good clinical examination one can find these, if the Doctor is aware of such rare things. Can you post further information as to",
    "Hi, Welcome to Chat Doctor .com I am Chat Doctor. Mariano Into Bruno Mascaras. I have gone through your query with diligence and would like you to know that I am here to help you. Alcohol affects nerve shaving alcohol will delay the healing process please avoid alcohol till you recover from bells palsy Hope you found the answer helpful. If you need any clarification / have doubts / have additional questions / have follow-up questions, then please do not hesitate in asking again. I will be happy to answer your questions. In the future, for continuity of care, I encourage you to contact me directly in Chat Doctor at http",
    "Hi, Thank you for posting your query. I have noted your nephews symptoms and diagnosis. The fitness for flying depends on the severity of injury, CT scan findings and patients clinical condition. The fact that your nephew did not require surgery suggests that the injury is not severe. If his clinical condition is good, he would be fit to fly. It would be useful if you can upload a copy of his CT scan report here. I hope my answer helps. Please get back if you have any follow-up queries or if you require any additional information. Wishing you good health, Chat Doctor. Ly/",
    "Hi, Thank you for posting your query. I think what you mean is atherosclerosis, where the arteries supplying blood to the brain get narrowed and may result in stroke due to ischemia (lack of blood flow). The treatment includes aspirin and stain use. The common risk factors for the same include high BP, sugar, cholesterol and smoking. Controlling these risk factors would stop the disease progression and prevent the strokes. Best wishes, Chat Doctor.",
    "Hi, Based on details your father had R) focal onset seizure with secondary generalization and loss of consciousness. Need to rule out L) cerebral hemisphere structural lesion. Since he had previous history of Triple bypass surgery risk of ischemic stroke producing seizure is high. Hence, dose of anti platelets should be increased after ruling out brain bleed. He also requires anti-epileptic Chat Doctor. Consult nearby neurologist for further plan and management",
    "Thanks for your question on Chat Doctor. In my opinion, you should rule out cardiac and pulmonary causes first for your intermittent chest pain. So get done ECG to rule out heart related causes. Get done chest x-ray to rule out pleurisy (inflammation of pleura) and lung related causes. If everything is normal then mostly you have anxiety and related chest discomfort. So better to consult psychiatrist and get done counselling sessions. Try to identify stressors in your life and start working on it. Avoid stress and anxiety. Be relax and calm.",
    "Hello dear user! I have gone through your query and understood your concerns! Thank you for sharing them on Chat Doctor. We can't be sure that these symptoms you are experiencing now indicate heart failure without doing some examinations. Usually swollen ankles and feet are found in heart congestive failure, but in these cases, feet and ankles are more swollen in the evening and less in the morning. Kidney problems, diabetes, thyroid problems etc., may lead to similar symptoms. So to determine the real cause of these concerns I would recommend you to do some examinations to let us know more about your health.- Blood pressure monitoring-Blood sugar and lipids-Urine test, proteinuria- Liver enzymes, and kidney function indicators (creatinine, urea)- ECG-Heart ultrasound examination and cardiologist consultation doctor may ask for more examinations if he sees reasonable. After we get these results well be able to determine your condition and treat it accordingly. Feel free to ask us again on this website. I hope this answer was helpful to you! Please kindly rate it as helpful and write a short review about your experience with me! I would appreciate that a lot. Thank you and best regards! Chat Doctor.",
    "Hi dear, I have gone through your question and understand your concerns. You are having recurrent urinary tract infection, which is most likely due to the calculi in the renal system. You should get active treatment for this infection, as it can cause preterm labor and delivery. You can continue taking plenty of fluids and oral antibiotics till the final culture report comes. Further management should be done accordingly. Hope you found the answer helpful. Wishing you good health. Regards Chat Doctor.",
    "Hi, thanks for writing to Chat Doctor and sharing your babies health concerns with us! Well, If I were your treating Doctor for this case of the purple/reddish lump on the mid-back of baby, I would think of few possibilities",
    "Hi and thank you so much for this query. I am so sorry to hear about what you are experiencing right now. A gall bladder problem can lead to diarrhea because food is not well digested as bile from the is very important in the digestion of fats. I will not particularly think that your pancreas has been damaged as you are not presenting with signs of pancreatitis which is often a pain. For now, stay relaxed and follow up with your doctors to figure out the exact cause and propose a treatment plan to you. I hope this ad Chat Doctor. Thank you so much for using our services, and please feel free to ask for clarifications if need be. I wish you the best of health.",
    "Hi, I think you can go for few cycles of natural monitoring by ultrasound. You can track your follicles' growth by repeated ultrasound and when your follicles is more than 17 to 18 mm, take injection for rupturing the follicles. Be in contact with your husband every 2 to 3 days after your periods stop. Take progesterone for next 2 weeks. Do a urine pregnancy test at home after that. You can try like that for 3 cycles at least before going to IVF. Continue your thyroid medicine. Hope I have answered your question. Regards",
    "Hi, Welcome to Chat Doctor forum, Your tooth which was filled earlier has got infection due to any residual caries or secondary caries. Due to caries, infection has reached the pulp and periapical abscess has formed. This pimple like bubble is due to abscess formed because pus needs a way to extrude itself. Consult a dentist for radio graphical examination done. Root canal treatment has to be done in this tooth. Take care",
    "Hi, No. This doesn't seems like a case of tethered cord, as tethered cord has both motor and sensory signs and symptom and are usually progressive. This appears more like a case of cerebral palsy, tight hamstring with weak muscles is characteristic of hamstrings. Initial treatment is mainly through orthotics with adjuvant surgical procedures but to reach a diagnosis, a detailed physical examination is essential. As far as tight hamstrings is concerned, they can be lengthened by surgery. Take care. Hope I have answered your question. Let me know if I can assist you further.",
    "Hi thanks for asking question. Let me clear your doubt dear... Hepatitis A spread in community by Eco oral route, means by contaminated food or water. So if you have this disease in childhood it is not spread to your child by you. Only if person take contaminated food or water by this virus then only hepatitis can occur. You have hepatitis A in childhood. So at that time protective antibody form in your system and protect you for many years. But virus cannot activate right now as it is already 10 years!! I hope I have solved your concern. Take care. Chat Doctor.",
    "Brief Answer",
    "Hi. I can understand your concern. Chest discomfort is commonly seen in bronchitis and lung infection. Since your chest x-ray is normal, no need to worry about lung infection. Possibility of bronchitis is more in your case. So better to consult pulmonologist and get done clinical examination of respiratory system and PFT (Pulmonary Function Test). PFT is needed for the diagnosis of bronchitis. It will also tell you about severity of the disease and treatment of bronchitis is based on severity only. You may need inhaled bronchodilators and inhaled corticosteroid (ICS)Don't worry, you will be alright. Hope I have solved your query. Wish you good health. Thanks.",
    "Hi Evan, The problem you are describing can be associated with the allergy and with the sinus. If you are having some sort of sinusitis which is related with the nose sinuses infection or gallery, this can present as the problem which you are facing with. Allergic sinusitis is a chronic problem and is mostly because of some allergic substance and this can lead to a chronic condition. I would suggest you to consult some ENT surgeon regarding this problem and after proper history and examination and if required some investigation, it can be confirmed whether this is some sort of allergic problem or something different. Thanks.",
    "Hello! Welcome and thank you for asking on Chat Doctor! I understand your concern and would explain that these skipped heart beats could be related to a cardiac arrhythmia. For this reason, I would recommend performing further tests",
    "Hi There After going through your query I understand your concern. I would like to tell you that possibilities of acid Reflux/HERD more than a heart disease is there if you don't get breathless, palpitation with chest pain. It's advisable for you to avoid junk and spicy food to get relief and can use over the counter antacids also. Also get an ESG and Echocardiography done as a routine cardiac check up. Hopefully this will answer your query. Kind Regards Chat Doctor.",
    "Hello and welcome to Chat Doctor, Abortion is not the cause of failure to conceive subsequently. Inability to conceive has many reasons.First, you have identified those days in your menstrual cycle when the chances of conception are maximum i.e. during ovulation. The period of ovulation can be determined by basal body temperature and changes in the cervical mucus. If conception does not take place even after taking care of the ovulation period, you need to get some investigations done. In your case, complete examination of the reproductive tract - ultrasonography and/ or hysterosalpingography, hormonal levels -estrogens, FSH and LH levels and follicular sac. In case of your husband, semen analysis should be under-taken. These are some of the investigations which will let your gynecologist know the cause of inability to conceive and thus plan management. Thanks and take care Chat Doctor."
]

# ============================================================
# CELL 0 — JSON INITIALIZATION (QUERIES + DOCTOR ANSWERS ONLY)
# ============================================================

import json
import os
from datetime import datetime, timezone

# ----------------------------
# CONFIG
# ----------------------------
JSON_PATH = "./RAW_DATA_QLoRA.json"

# ----------------------------
# LOAD OR INIT JSON
# ----------------------------
if os.path.exists(JSON_PATH):
    with open(JSON_PATH, "r", encoding="utf-8") as f:
        results = json.load(f)
else:
    results = {}

# ----------------------------
# VALID EMPTY MODEL TEMPLATE
# ----------------------------
EMPTY_MODEL_BLOCK = {
    "text": None,
    "latency_sec": None,
    "prompt_tokens": None,
    "generated_tokens": None,
    "total_tokens": None,
    "timestamp_utc": None
}

# ----------------------------
# POPULATE JSON WITH DATA
# ----------------------------
for idx, (query, doctor_answer) in enumerate(zip(queries, doctor_answers), start=1):
    key = str(idx)

    if key not in results:
        results[key] = {
            "query": query,
            "doctor_answer": doctor_answer,
            "base_model": EMPTY_MODEL_BLOCK.copy(),
            "lora_model": EMPTY_MODEL_BLOCK.copy(),
            "meta": {
                "created_at_utc": datetime.now(timezone.utc).isoformat()
            }
        }
    else:
        # аккуратно дополняем, если структура частично существует
        results[key].setdefault("query", query)
        results[key].setdefault("doctor_answer", doctor_answer)
        results[key].setdefault("base_model", EMPTY_MODEL_BLOCK.copy())
        results[key].setdefault("lora_model", EMPTY_MODEL_BLOCK.copy())
        results[key].setdefault(
            "meta",
            {"created_at_utc": datetime.now(timezone.utc).isoformat()}
        )

# ----------------------------
# WRITE JSON
# ----------------------------
with open(JSON_PATH, "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

print(f"JSON initialized successfully: {JSON_PATH}")
print(f"Total QA entries: {len(results)}")

JSON initialized successfully: ./RAW_DATA_QLoRA.json
Total QA entries: 30


In [None]:
# ============================================================
# CELL 1 — BASE MODEL INFERENCE (EXISTING JSON)
# ============================================================

import json
import time
from datetime import datetime, timezone

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# ----------------------------
# CONFIG
# ----------------------------
MODEL_ID = "Qwen/Qwen2.5-3B-Instruct"
JSON_PATH = "./QLoRA.json"

DTYPE = torch.float16

# ----------------------------
# QUANTIZATION
# ----------------------------
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=DTYPE,
)

# ----------------------------
# TOKENIZER
# ----------------------------
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    trust_remote_code=True
)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# ----------------------------
# INFERENCE FUNCTION
# ----------------------------
def run_inference(model, query: str) -> dict:
    messages = [
        {
            "role": "system",
            "content": "You are a licensed medical doctor. Respond in a professional, neutral, and explanatory tone."
        },
        {
            "role": "user",
            "content": query
        }
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
        padding=True
    ).to(model.device)

    attention_mask = inputs.ne(tokenizer.pad_token_id)
    prompt_tokens = inputs.shape[1]

    start_time = time.perf_counter()

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            attention_mask=attention_mask,
            max_new_tokens=1024,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

    latency = time.perf_counter() - start_time

    generated_tokens = outputs.shape[1] - prompt_tokens
    total_tokens = outputs.shape[1]

    generated = outputs[:, prompt_tokens:]
    text = tokenizer.decode(generated[0], skip_special_tokens=True).strip()

    return {
        "text": text,
        "latency_sec": round(latency, 4),
        "prompt_tokens": int(prompt_tokens),
        "generated_tokens": int(generated_tokens),
        "total_tokens": int(total_tokens),
        "timestamp_utc": datetime.now(timezone.utc).isoformat()
    }

# ----------------------------
# LOAD BASE MODEL
# ----------------------------
print("Loading BASE model...")
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
base_model.eval()

# ----------------------------
# LOAD JSON
# ----------------------------
with open(JSON_PATH, "r", encoding="utf-8") as f:
    results = json.load(f)

# ----------------------------
# BASE INFERENCE LOOP
# ----------------------------
for key in sorted(results.keys(), key=lambda x: int(x)):
    entry = results[key]

    # уже есть результат → пропускаем
    if entry.get("base_model", {}).get("text") is not None:
        continue

    print(f"[BASE] QA #{key}")

    base_out = run_inference(base_model, entry["query"])
    entry["base_model"] = base_out

    with open(JSON_PATH, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

print("BASE inference completed.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Loading BASE model...


config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[BASE] QA #1
[BASE] QA #2
[BASE] QA #3
[BASE] QA #4
[BASE] QA #5
[BASE] QA #6
[BASE] QA #7
[BASE] QA #8
[BASE] QA #9
[BASE] QA #10
[BASE] QA #11
[BASE] QA #12
[BASE] QA #13
[BASE] QA #14
[BASE] QA #15
[BASE] QA #16
[BASE] QA #17
[BASE] QA #18
[BASE] QA #19
[BASE] QA #20
[BASE] QA #21
[BASE] QA #22
[BASE] QA #23
[BASE] QA #24
[BASE] QA #25
[BASE] QA #26
[BASE] QA #27
[BASE] QA #28
[BASE] QA #29
[BASE] QA #30
BASE inference completed.


In [None]:
# ============================================================
# CELL 2 — LoRA MODEL INFERENCE (EXISTING JSON)
# ============================================================

import json
import time
from datetime import datetime, timezone

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

# ----------------------------
# CONFIG
# ----------------------------
MODEL_ID = "Qwen/Qwen2.5-3B-Instruct"
ADAPTER_PATH = "./RAW_Qwen3B_QLoRA/adapter"
JSON_PATH = "./RAW_DATA_QLoRA.json"

DTYPE = torch.float16

# ----------------------------
# QUANTIZATION
# ----------------------------
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=DTYPE,
)

# ----------------------------
# TOKENIZER
# ----------------------------
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    trust_remote_code=True
)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# ----------------------------
# INFERENCE FUNCTION
# ----------------------------
def run_inference(model, query: str) -> dict:
    messages = [
        {
            "role": "system",
            "content": "You are a licensed medical doctor. Respond in a professional, neutral, and explanatory tone."
        },
        {
            "role": "user",
            "content": query
        }
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
        padding=True
    ).to(model.device)

    attention_mask = inputs.ne(tokenizer.pad_token_id)
    prompt_tokens = inputs.shape[1]

    start_time = time.perf_counter()

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            attention_mask=attention_mask,
            max_new_tokens=1024,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

    latency = time.perf_counter() - start_time

    generated_tokens = outputs.shape[1] - prompt_tokens
    total_tokens = outputs.shape[1]

    generated = outputs[:, prompt_tokens:]
    text = tokenizer.decode(generated[0], skip_special_tokens=True).strip()

    return {
        "text": text,
        "latency_sec": round(latency, 4),
        "prompt_tokens": int(prompt_tokens),
        "generated_tokens": int(generated_tokens),
        "total_tokens": int(total_tokens),
        "timestamp_utc": datetime.now(timezone.utc).isoformat()
    }

# ----------------------------
# LOAD LoRA MODEL
# ----------------------------
print("Loading LoRA model...")
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

lora_model = PeftModel.from_pretrained(
    base_model,
    ADAPTER_PATH,
)
lora_model.eval()

# ----------------------------
# LOAD JSON
# ----------------------------
with open(JSON_PATH, "r", encoding="utf-8") as f:
    results = json.load(f)

# ----------------------------
# LoRA INFERENCE LOOP
# ----------------------------
for key in sorted(results.keys(), key=lambda x: int(x)):
    entry = results[key]

    if entry.get("lora_model", {}).get("text") is not None:
        continue

    print(f"[LoRA] QA #{key}")

    lora_out = run_inference(lora_model, entry["query"])
    entry["lora_model"] = lora_out

    with open(JSON_PATH, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

print("LoRA inference completed.")

Loading LoRA model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[LoRA] QA #1
[LoRA] QA #2
[LoRA] QA #3
[LoRA] QA #4
[LoRA] QA #5
[LoRA] QA #6
[LoRA] QA #7
[LoRA] QA #8
[LoRA] QA #9
[LoRA] QA #10
[LoRA] QA #11
[LoRA] QA #12
[LoRA] QA #13
[LoRA] QA #14
[LoRA] QA #15
[LoRA] QA #16
[LoRA] QA #17
[LoRA] QA #18
[LoRA] QA #19
[LoRA] QA #20
[LoRA] QA #21
[LoRA] QA #22
[LoRA] QA #23
[LoRA] QA #24
[LoRA] QA #25
[LoRA] QA #26
[LoRA] QA #27
[LoRA] QA #28
[LoRA] QA #29
[LoRA] QA #30
LoRA inference completed.


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

# ----------------------------
# CONFIG
# ----------------------------
model_id = "Qwen/Qwen2.5-3B-Instruct"
adapter_path = "./ROW_Qwen3B_QLoRA/adapter"

# ----------------------------
# DATA (30 QA)
# ----------------------------
queries = [
    "Hi! Please help,I have a small, slightly bumpy white splotch on the inside of both my cheeks. It is surrounded by small prominent purple veins. Is this cancerous? Im a 23 year old non-smoking male. Should I wait a few days or see a dr immediately? Thank you for your kindness.",
    "my 1 month old son is showing signs of gas problem symptoms are -1) after feeding burping is not always proper gas will not come if we tap his back for 10-15 min 2) after some time he will show like he is swallowing some thing which has again come to his mouth from stomach, 3) some times milk will come threw his nose , 4)after sleeping 1/2 hour to 1 hour he will cry in sleep like he has some difficulty what i has to do can i try colicaid or grip water",
    "Hi, may I answer your health queries right now ? Please type your query here...My 20 year old son is almost always tired and sleeps a LOT. This started 4-5 years ago and he had a very involved medical work-up at that time, but they did not come up with any cause or what to do about it. What could cause this? Is it worth trying a medical workup again?",
    "hi Docter, I am 24 yrs old female staying in bangalore for the past 4 yrs. I am have Oily hair due to which i feel that I am facing hair loss as i have to shampoo my hair every alternate day. Can you please suggest me some measures to stop hair loss. Is it the water which is the problem??",
    "My son has been diagnosed with 3mm vsd in echo, he was 6days then, paed cardiologist had put him on furosemide drops, the baby feeds very well, now he is 17days old, but continous urination since past few days bothering me very much since he i7 unable to achieve the desired weight, should i stop the furosemide drop,",
    "Hi, my sister has a benign lipoma in her left wrist. She has had it since she was little and she s now 28. She has had surgery on it three times now, but doctors haven t been able to remove it because it s intertwined with nerves. Earlier today, it started to hurt more than usual and get really hot. She tried putting ice on it to cool it down, but it only helped a little and it quickly got hot again after the ice was taken off. Is this something we should be concerned about? Or is there something we can do for it?",
    "I tested very high SGPT/SGOT levels. SGPT level is 122 and SGOT is 149.I have been advised to take Udiliv 300mg & mecobion-od twice daily. Would like to know if it is ok to have such medication and how serious may be my present condition and effect on my health. Is Any precaution I may need to take and any possible risks, as I am extremely tense about the situation. Many thanks in advance to advise.",
    "I have a lump on the roof of my mouth ( the dentist had told me at one time, that I must have had it since I was a child) it is starting to bother me about a mouth ago. It feels like sand paper was rub on it and now all I feel is rawness, my sinuses are bothering me also, can this be connected? My ears feel stuffy and slightly burning and my throat feels like cotton is stuck in it. Please let me know what you think. I will also book an appointment with my Doctor.",
    "Hello Im Joelmy girlfriend, after having 1 week period delay, took a pregnancy test, which resulted positive. However, 2 weeks later she blood-checked in a hospital and the resulted NOT pregnant. Though her period didnt come yet, I wasnt given any clear answer from the hospital, please help! thank you",
    "I have been diagnosed with GAD since recovering from prostate cancer & possible duodenal cancer that was determined to be non malignant both within the past 2 years. I have tried 5 different SSRI s and 3 different SSNI s, none of which provided any relief. Side effects for most made my anxiety worse. I have also seen 4 psychiatrists with some minor success. CBT offering some help, the others, EMDR & Talk Therapy very little. The only relief I get is with 1MG Lorazepam as needed with no side effects. Why all the negativity re this medication.",
    "i have been suffering right upper abdominal pain for the last 6 years.CT SCAN, ENDOSCOPY, ULTRASONOGRAHY,VARIOUS BLOOD TEST, AND HEPATO BILLIARY TEST FOUND NOTHING.NOW I AM FINDING A LITTLE HARD LIKE A PIPE THING JUST BELOW RIGT RIB CAGE. PLEASE SUGGEST.",
    "i have developed belle palseythree days ago,i had shingles vaccination and flue vaccination done about two weeks ago Brain CT sacan and Mri normal,blood work normal,no diabetes,doctor say i have this condition due to schlnles live virus infection.i have no previous history of trauma. Is it okay to have one or two alcohol.",
    "Hi my nephew is in thailand and has fractured his skull in two places resulting in atleast one bleed to his brain, he is on drugs to combat the bleeding, the doctor said he is willing to sign a form for him to fly 13 hrs home to uk, is this safe to do so?? thank you",
    "recently told I had plaque thinning the arteries in my brain. can you give me good links that will tell me what this means, what caused it and how it can be fixed? I would like to know the good, bad and ugly.I have a multitude of medical conditions and would like to know if this is due to any of those conditions. thanks.",
    "hi yesterday my dad had a fit - my mum described it like this",
    "Hello I m a 23 year old female been having sharp chest pains. Started a few months ago I would get them randomly and then it would go away. Just last week it started but hasn t went away went the to the er last Friday and they said it was pleurisy never gave me any type of x ray or ct scan just diagnosed me n sent me home w a shot and prescription for naprosyn500mg. I have yet to get the medicine due to money issues at the moment but the pain hurts so bad when I breath and I just took 4 200mg ibprof. N still feel the pain",
    "Hi, I m a woman in her late thirties. I am experiencing significant (obvious) swelling in my feet, ankles and calves almost every day. It goes away overnight (while I am asleep in bed) but seems to show up again every evening. I also experience puffiness in my face. I am overweight but not obese, and I walk at least 1.5 miles every day. My Grandmother has congestive heart failure (has had it a long time) and she began getting symptoms like this in her thirties. Do you think that is my future as well?",
    "Hello Dr.I m 30 weeks pregnanthave detected with calculi of 3- 4 mm in both the kidneys.also pus cells with 150- 180/ hpf , protien, bacteria and yeast on 11.08.10on 12. 07.10 my urine report was pus cell with 25-30 hpf. Dr. gave me zocef (500) for five days after that pus cells reduced to 15-20 again Dr. gave me Taxim-o (200) for five days .but my pain didnt stopped im having severe pain in left side of the abdomen.and now after a month i just did my urine test my pus cell increased to 150-180 /hpf. Dr. suggested for Urine Culture test. but i will getmy report after days so till that report comes should i continue to take any antibiotic.please suggest me.Regardsmrs. shaikh",
    "My 4 month old baby has a purple/reddish lump on her mid back that seems to be getting larger. Her doctor said it could be a platelet issue, I cant remember the term she used, and that we would watch it closely. Do you know what the doctor is talking about and should I get another opinion??",
    "Hi! I am 5 wks. pregnant. I have a sluggish gallbladder, output of 27% , was to see a surgeon next week about having it removed until i found out i was pregnant. I am now having frequent loose stools. they almost appear oily or fatty.... greenish/brownish... not sure if its from the pregnancy or i guess my fear is that the gallbladder problem has created havoc on my pancreas and im freaking out",
    "Hi, Im 29 yrs old and married for 6 yrs and not yet conceived at all. Had been in treatment from the 8th month of my marriage and took breaks too. GG, had been there for almost 2 yrs with laproscopy and 5 unsuccessful IUIs done. And 2 IUIs at Prashant multi speciality hospitals, which of first happened to be weakly positive. My problem is there is no diagnosis of what my problem is. My husbands count n motility is normal. My laprscopic n follicular studies give clear positive results. Inspite i have not conceived. Really wondering to know why we have to do IVF. I have a perfect 28 days cycle. My BMI is 29(overweight) and from novemeber 2010 have been identified thyroid too. Could there be anything like egg doesnt release? What to do if so? or egg release but too thick for the sperm to penetrate? what could be the reason? What to do now?",
    "I have a tooth that was worked on 6 months ago.. they took my grey cavity out and filled it w a white one and also protected it w something.... they spent awhile on it. months later it was infected, so I took anti biotic... only bothered me a few times after minimal compared to b4 antibiotic... Now say 4 months I have a pimple like bubble on the side of my tooth... Is this conhhenry1978cerns to me suggestion I need a root canal??",
    "Our 9 yr old boy has some issues with his running. His main issue is very tight hamstrings, along with weak hips. He wears orthotics and has been to PT as well as working with a running coach to help his bio mechanics. Things have improved apart from the hamstrings which have refused to loosen. Our PT mentioned mild Tethered Cord as an option, do you think this could be a possibility. He is a big guy, already 5ft and plays soccer, BB and tennis. Thanks Phil.",
    "Hello. I am 24 years old. When I was in Second grade I got Hepatitis A. So I was between the age of 7-9. Is it still in my system after that many years? If I ever have children is there a possibility they will have it? Thanks for your time. Hope to hear from you soon.",
    "Thank you; I tripped over a paving slab on 3rd May and fell forward flat on the ground, hitting my chin and my left knee particularly. The knee was never swollen but was very bruised. The leg was sore to walk on but gradually got better until last week when I had a difficult drive to work owing to heavy traffic. After that my knee became noticeably more sore (though not extreme pain) and since then it has got worse again. No muscular pain, swelling or bruising but it gets progressively painful if I try to walk and if I touch around (as opposed to upon) the kneecap it stings. I now can t really walk or drive and am off work. Apologies for the lengthy e-mail. I would appreciate your advice.",
    "I have a feeling of a lump or something in my chest.squeezing across the mid section. plus trouble catching my breath sometimes,also a swishing noise in my chest to my head. after the squeezing and swishing I get a headache and my chest feels like I have been exercising. what is happing to me?",
    "Hello I have a lump in the centre on the roof of my mouth. It has been there for almost 10 years. It becomes swollen, irritated and itchy for periods of time (days to weeks) and then settles down again, only to come back a week or so later. It feels like it is related to my allergies (eye conjunctivitis ). I did see an ENT specialist about 6 years ago. He told me it was just some dermatitis . What I really need is for someone to tell me how I can get some relief. It is very uncomfortable.",
    "Hi, may I answer your health queries right now ? Please type your query here... I went to the doctor because of skipped heart beats, only lasting a second, which causes me to cough. Just didnt feel right. At the office my blood pressure was 169/92 which is 50 pts up for me, ekg turned out ok. He did chest xray and blood work. Waiting for results and appt. for echocardiogram. Heart feels like it beats a lot stronger.",
    "Im 54 year old female, dont smoke, about 30 pounds overweight, my blood pressure typically is around, 128 over 80-something. About 18 months ago I had a chest pain that started in what felt like my stomach and spread across my chest, it was sharp and lasted a few minutes. I never had another until recently, now I get them a couple times a week. they feel like they start under my left breast and spread across both breasts and a feeling that I need to burp comes up into my throat, but I dont burp. Sometimes, it moves into my jaw. Once its gone, I feel fine. What do you think?",
    "hellow... req u pls advise best gynecologist... five yrs has been passed we are unable to obtained one child.. in starting pregnancy was ok but after one Abortion we hv got serious prob. now we hv tried more even take consultancy with many gynecologist but unable to get pregnancy... pls help me and advise if Abortion is main reason not to getting pregnant ... ???"
]

doctor_answers = [
    "Thanks for posting your query to Chat Doctor. After going through your history, I want to assure you not to get worried about it. White spots in oral cavity can occur due to many reasons which can only be told after proper visual examination. Do you have any other skin problem as sometimes these spots may be related to other systemic conditions? I would suggest you to visit a dentist and if needed a dermatologist for proper examination and treatment. Hope my answer will help you.",
    "Hi... Thank you for consulting in Chat Doctor. This is called evening colic and is quite common in this age group. This happens when the baby sucks at the breast very fast and in eagerness to Chat Doctor. Unless the air comes out like burping or flatus this discomfort will be there. Usually I don't advice any medicines for this. The best ways are proper burping and prone position with gentle back patting.",
    "I understand your concerns. From the description, diagnosis may not be possible. But your son could be suffering from depression which can cause lethargy and excessive sleep. I would suggest consulting a psychologist without delay.",
    "It seems you are suffering from seborrheic dermatitis which can lead to hair fall. Use medicated shampoos, avoid excessive oiling, and take supplements like biotin. This should help.",
    "Furosemide is a diuretic and increased urination is expected. You should not stop it without consulting your cardiologist. Regular follow-up is important.",
    "This is most likely a ganglion rather than a lipoma. The pain and heat suggest infection. Consult an orthopedic surgeon; antibiotics and imaging like MRI may be needed.",
    "Your liver enzymes are elevated, indicating liver disease. Udiliv is appropriate. Follow a low-fat diet and do not worry excessively. Levels should normalize.",
    "Lumps on the roof of the mouth are often benign but need examination. Sinus and throat symptoms may or may not be related. Please see your doctor.",
    "Blood beta-HCG is more accurate than urine tests. If blood test is negative, pregnancy is unlikely. Ultrasound can be done if doubt persists.",
    "In resistant anxiety cases, alternative therapies like NLP or specialized acupuncture may help. Lorazepam can help symptoms but long-term strategy should be discussed.",
    "HI. This may be a very small hernia in the center which is missed by all. Another possibility is costo-chon Chat Doctor. Hard-like-pipe thing is suggestive of this. Needs the investigations on this ground and a good clinical examination one can find these, if the Doctor is aware of such rare things. Can you post further information as to",
    "Hi, Welcome to Chat Doctor .com I am Chat Doctor. Mariano Into Bruno Mascaras. I have gone through your query with diligence and would like you to know that I am here to help you. Alcohol affects nerve shaving alcohol will delay the healing process please avoid alcohol till you recover from bells palsy Hope you found the answer helpful. If you need any clarification / have doubts / have additional questions / have follow-up questions, then please do not hesitate in asking again. I will be happy to answer your questions. In the future, for continuity of care, I encourage you to contact me directly in Chat Doctor at http",
    "Hi, Thank you for posting your query. I have noted your nephews symptoms and diagnosis. The fitness for flying depends on the severity of injury, CT scan findings and patients clinical condition. The fact that your nephew did not require surgery suggests that the injury is not severe. If his clinical condition is good, he would be fit to fly. It would be useful if you can upload a copy of his CT scan report here. I hope my answer helps. Please get back if you have any follow-up queries or if you require any additional information. Wishing you good health, Chat Doctor. Ly/",
    "Hi, Thank you for posting your query. I think what you mean is atherosclerosis, where the arteries supplying blood to the brain get narrowed and may result in stroke due to ischemia (lack of blood flow). The treatment includes aspirin and stain use. The common risk factors for the same include high BP, sugar, cholesterol and smoking. Controlling these risk factors would stop the disease progression and prevent the strokes. Best wishes, Chat Doctor.",
    "Hi, Based on details your father had R) focal onset seizure with secondary generalization and loss of consciousness. Need to rule out L) cerebral hemisphere structural lesion. Since he had previous history of Triple bypass surgery risk of ischemic stroke producing seizure is high. Hence, dose of anti platelets should be increased after ruling out brain bleed. He also requires anti-epileptic Chat Doctor. Consult nearby neurologist for further plan and management",
    "Thanks for your question on Chat Doctor. In my opinion, you should rule out cardiac and pulmonary causes first for your intermittent chest pain. So get done ECG to rule out heart related causes. Get done chest x-ray to rule out pleurisy (inflammation of pleura) and lung related causes. If everything is normal then mostly you have anxiety and related chest discomfort. So better to consult psychiatrist and get done counselling sessions. Try to identify stressors in your life and start working on it. Avoid stress and anxiety. Be relax and calm.",
    "Hello dear user! I have gone through your query and understood your concerns! Thank you for sharing them on Chat Doctor. We can't be sure that these symptoms you are experiencing now indicate heart failure without doing some examinations. Usually swollen ankles and feet are found in heart congestive failure, but in these cases, feet and ankles are more swollen in the evening and less in the morning. Kidney problems, diabetes, thyroid problems etc., may lead to similar symptoms. So to determine the real cause of these concerns I would recommend you to do some examinations to let us know more about your health.- Blood pressure monitoring-Blood sugar and lipids-Urine test, proteinuria- Liver enzymes, and kidney function indicators (creatinine, urea)- ECG-Heart ultrasound examination and cardiologist consultation doctor may ask for more examinations if he sees reasonable. After we get these results well be able to determine your condition and treat it accordingly. Feel free to ask us again on this website. I hope this answer was helpful to you! Please kindly rate it as helpful and write a short review about your experience with me! I would appreciate that a lot. Thank you and best regards! Chat Doctor.",
    "Hi dear, I have gone through your question and understand your concerns. You are having recurrent urinary tract infection, which is most likely due to the calculi in the renal system. You should get active treatment for this infection, as it can cause preterm labor and delivery. You can continue taking plenty of fluids and oral antibiotics till the final culture report comes. Further management should be done accordingly. Hope you found the answer helpful. Wishing you good health. Regards Chat Doctor.",
    "Hi, thanks for writing to Chat Doctor and sharing your babies health concerns with us! Well, If I were your treating Doctor for this case of the purple/reddish lump on the mid-back of baby, I would think of few possibilities",
    "Hi and thank you so much for this query. I am so sorry to hear about what you are experiencing right now. A gall bladder problem can lead to diarrhea because food is not well digested as bile from the is very important in the digestion of fats. I will not particularly think that your pancreas has been damaged as you are not presenting with signs of pancreatitis which is often a pain. For now, stay relaxed and follow up with your doctors to figure out the exact cause and propose a treatment plan to you. I hope this ad Chat Doctor. Thank you so much for using our services, and please feel free to ask for clarifications if need be. I wish you the best of health.",
    "Hi, I think you can go for few cycles of natural monitoring by ultrasound. You can track your follicles' growth by repeated ultrasound and when your follicles is more than 17 to 18 mm, take injection for rupturing the follicles. Be in contact with your husband every 2 to 3 days after your periods stop. Take progesterone for next 2 weeks. Do a urine pregnancy test at home after that. You can try like that for 3 cycles at least before going to IVF. Continue your thyroid medicine. Hope I have answered your question. Regards",
    "Hi, Welcome to Chat Doctor forum, Your tooth which was filled earlier has got infection due to any residual caries or secondary caries. Due to caries, infection has reached the pulp and periapical abscess has formed. This pimple like bubble is due to abscess formed because pus needs a way to extrude itself. Consult a dentist for radio graphical examination done. Root canal treatment has to be done in this tooth. Take care",
    "Hi, No. This doesn't seems like a case of tethered cord, as tethered cord has both motor and sensory signs and symptom and are usually progressive. This appears more like a case of cerebral palsy, tight hamstring with weak muscles is characteristic of hamstrings. Initial treatment is mainly through orthotics with adjuvant surgical procedures but to reach a diagnosis, a detailed physical examination is essential. As far as tight hamstrings is concerned, they can be lengthened by surgery. Take care. Hope I have answered your question. Let me know if I can assist you further.",
    "Hi thanks for asking question. Let me clear your doubt dear... Hepatitis A spread in community by Eco oral route, means by contaminated food or water. So if you have this disease in childhood it is not spread to your child by you. Only if person take contaminated food or water by this virus then only hepatitis can occur. You have hepatitis A in childhood. So at that time protective antibody form in your system and protect you for many years. But virus cannot activate right now as it is already 10 years!! I hope I have solved your concern. Take care. Chat Doctor.",
    "Brief Answer",
    "Hi. I can understand your concern. Chest discomfort is commonly seen in bronchitis and lung infection. Since your chest x-ray is normal, no need to worry about lung infection. Possibility of bronchitis is more in your case. So better to consult pulmonologist and get done clinical examination of respiratory system and PFT (Pulmonary Function Test). PFT is needed for the diagnosis of bronchitis. It will also tell you about severity of the disease and treatment of bronchitis is based on severity only. You may need inhaled bronchodilators and inhaled corticosteroid (ICS)Don't worry, you will be alright. Hope I have solved your query. Wish you good health. Thanks.",
    "Hi Evan, The problem you are describing can be associated with the allergy and with the sinus. If you are having some sort of sinusitis which is related with the nose sinuses infection or gallery, this can present as the problem which you are facing with. Allergic sinusitis is a chronic problem and is mostly because of some allergic substance and this can lead to a chronic condition. I would suggest you to consult some ENT surgeon regarding this problem and after proper history and examination and if required some investigation, it can be confirmed whether this is some sort of allergic problem or something different. Thanks.",
    "Hello! Welcome and thank you for asking on Chat Doctor! I understand your concern and would explain that these skipped heart beats could be related to a cardiac arrhythmia. For this reason, I would recommend performing further tests",
    "Hi There After going through your query I understand your concern. I would like to tell you that possibilities of acid Reflux/HERD more than a heart disease is there if you don't get breathless, palpitation with chest pain. It's advisable for you to avoid junk and spicy food to get relief and can use over the counter antacids also. Also get an ESG and Echocardiography done as a routine cardiac check up. Hopefully this will answer your query. Kind Regards Chat Doctor.",
    "Hello and welcome to Chat Doctor, Abortion is not the cause of failure to conceive subsequently. Inability to conceive has many reasons.First, you have identified those days in your menstrual cycle when the chances of conception are maximum i.e. during ovulation. The period of ovulation can be determined by basal body temperature and changes in the cervical mucus. If conception does not take place even after taking care of the ovulation period, you need to get some investigations done. In your case, complete examination of the reproductive tract - ultrasonography and/ or hysterosalpingography, hormonal levels -estrogens, FSH and LH levels and follicular sac. In case of your husband, semen analysis should be under-taken. These are some of the investigations which will let your gynecologist know the cause of inability to conceive and thus plan management. Thanks and take care Chat Doctor."
]


# ----------------------------
# QUANTIZATION
# ----------------------------
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

# ----------------------------
# TOKENIZER
# ----------------------------
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    trust_remote_code=True
)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# ----------------------------
# INFERENCE FUNCTION
# ----------------------------
def run_inference(model, query):
    messages = [
        {
            "role": "system",
            "content": "You are a licensed medical doctor. Respond in a professional, neutral, and explanatory tone."
        },
        {
            "role": "user",
            "content": query
        }
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
        padding=True
    ).to(model.device)

    attention_mask = inputs.ne(tokenizer.pad_token_id)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            attention_mask=attention_mask,
            max_new_tokens=1024,
            do_sample=False,            # детерминированный режим для воспроизводимости
            temperature=0.7,            # при do_sample=False не учитывается
            top_p=0.95,                 # при do_sample=False не учитывается
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated = outputs[:, inputs.shape[1]:]
    return tokenizer.decode(generated[0], skip_special_tokens=True).strip()

# ----------------------------
# LOAD MODELS
# ----------------------------
print("Loading BASE model...")
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

print("\nLoading LoRA model...")
base_for_lora = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
lora_model = PeftModel.from_pretrained(
    base_for_lora,
    adapter_path,
)

lora_model.eval()
base_model.eval()

# ----------------------------
# RUN COMPARISON
# ----------------------------
print("Тест адаптера, обученного на исходных данных")

for i in range(30):
    print("\n" + "=" * 40)
    print(f"QA #{i+1}")
    print("=" * 40)
    print("\nЗАПРОС ПОЛЬЗОВАТЕЛЯ:")
    print(queries[i])
    print("\nЭТАЛОННЫЙ ОТВЕТ ВРАЧА:")
    print(doctor_answers[i])
    print("\nОТВЕТ БАЗОВОЙ МОДЕЛИ:")
    print(run_inference(base_model, queries[i]))
    print("\nОТВЕТ МОДЕЛИ С АДАПТЕРОМ:")
    print(run_inference(lora_model, queries[i]))

print("\n" + "=" * 40)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading BASE model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Loading LoRA model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Тест адаптера, обученного на исходных данных

QA #1

ЗАПРОС ПОЛЬЗОВАТЕЛЯ:
Hi! Please help,I have a small, slightly bumpy white splotch on the inside of both my cheeks. It is surrounded by small prominent purple veins. Is this cancerous? Im a 23 year old non-smoking male. Should I wait a few days or see a dr immediately? Thank you for your kindness.

ЭТАЛОННЫЙ ОТВЕТ ВРАЧА:
Thanks for posting your query to Chat Doctor. After going through your history, I want to assure you not to get worried about it. White spots in oral cavity can occur due to many reasons which can only be told after proper visual examination. Do you have any other skin problem as sometimes these spots may be related to other systemic conditions? I would suggest you to visit a dentist and if needed a dermatologist for proper examination and treatment. Hope my answer will help you.

ОТВЕТ БАЗОВОЙ МОДЕЛИ:
It's important to approach any concerning skin changes with caution, but it's also crucial not to overreact. The symp

# Обучение адаптера на очищенных данных


In [None]:
from google.colab import files

# Загрузка файла с локального ПК
uploaded = files.upload()

Saving Doctor_V2_QLoRA_1000_split.csv to Doctor_V2_QLoRA_1000_split.csv


In [None]:
df = pd.read_csv('Doctor_V2_QLoRA_1000_split.csv')
df.head(1)

Unnamed: 0.1,Unnamed: 0,instruction,input,output_v21
0,53582,You are a licensed medical doctor. Respond in ...,my friend is currently in the hospital because...,"Based on the limited history provided, a defin..."


In [None]:
instruction_text = (
    "You are a licensed medical doctor. Respond in a professional, neutral, and explanatory tone."
)
df = df.drop(columns=['Unnamed: 0'])
df["instruction"] = instruction_text
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   instruction  1000 non-null   object
 1   input        1000 non-null   object
 2   output_v21   1000 non-null   object
dtypes: object(3)
memory usage: 23.6+ KB


In [None]:
model_id = "Qwen/Qwen2.5-3B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,  # КРИТИЧНО
)

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

tokenizer.padding_side = "right"



model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
)

model.config.pad_token_id = tokenizer.pad_token_id

model = prepare_model_for_kbit_training(model)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

In [None]:
def formatting_func(example):
    messages = [
        {"role": "system", "content": example["instruction"]},
        {"role": "user", "content": example["input"]},
        {"role": "assistant", "content": example["output_v21"]},
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False,
    )
    return {"text": text}

dataset = Dataset.from_pandas(df)
dataset = dataset.map(
    formatting_func,
    remove_columns=dataset.column_names,
)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 29,933,568 || all params: 3,115,872,256 || trainable%: 0.9607


In [None]:
sft_config = SFTConfig(
    output_dir="./Clean_Qwen3B_QLoRA",

    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,

    optim="adamw_torch",        # ← безопасный оптимизатор
    learning_rate=9e-5,         # Сделал чуть ниже среднего
    lr_scheduler_type="cosine",

    fp16=False,                 # ← ВАЖНО
    bf16=False,                 # ← ВАЖНО
    max_grad_norm=0.0,          # ← КРИТИЧНО (иначе падение)

    num_train_epochs=1,

    dataset_text_field="text",
    max_length=1024,
    packing=False,

    logging_steps=10,
    report_to="none",
    remove_unused_columns=False,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=sft_config,
)

trainer.train()

# сохранить ТОЛЬКО обученный LoRA-адаптер
model.eval()

adapter_path = "./Clean_Qwen3B_QLoRA/adapter"
model.save_pretrained(adapter_path)
tokenizer.save_pretrained(adapter_path)

Adding EOS to train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None}.


Step,Training Loss
10,3.0085
20,2.3541
30,2.2379
40,2.0818
50,2.1078
60,2.0434
70,2.0456
80,2.0346
90,2.0373
100,2.1472


Step,Training Loss
10,3.0085
20,2.3541
30,2.2379
40,2.0818
50,2.1078
60,2.0434
70,2.0456
80,2.0346
90,2.0373
100,2.1472


('./Clean_Qwen3B_QLoRA/adapter/tokenizer_config.json',
 './Clean_Qwen3B_QLoRA/adapter/special_tokens_map.json',
 './Clean_Qwen3B_QLoRA/adapter/chat_template.jinja',
 './Clean_Qwen3B_QLoRA/adapter/vocab.json',
 './Clean_Qwen3B_QLoRA/adapter/merges.txt',
 './Clean_Qwen3B_QLoRA/adapter/added_tokens.json',
 './Clean_Qwen3B_QLoRA/adapter/tokenizer.json')

# Инференс для сравнения BaseLine Модели и двух адаптеров QLoRA. Продолжение для адаптера, обученного на очищенных данных

In [None]:
import json
from pathlib import Path

INPUT_PATH = Path("QLoRA.json")
OUTPUT_PATH = Path("QLoRA.json")  # можно заменить на другой файл при необходимости

CLEANED_TEMPLATE = {
    "text": "",
    "latency_sec": None,
    "prompt_tokens": None,
    "generated_tokens": None,
    "total_tokens": None,
    "timestamp_utc": ""
}

def main():
    if not INPUT_PATH.exists():
        raise FileNotFoundError(f"Файл не найден: {INPUT_PATH}")

    with INPUT_PATH.open("r", encoding="utf-8") as f:
        data = json.load(f)

    modified = False

    for sample_id, sample in data.items():
        if not isinstance(sample, dict):
            continue

        if "RAW_DATA_QLoRA" in sample:
            if "Cleaned_DATA_QLoRA" not in sample:
                sample["Cleaned_DATA_QLoRA"] = CLEANED_TEMPLATE.copy()
                modified = True

    if modified:
        with OUTPUT_PATH.open("w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
        print("✅ Cleaned_DATA_QLoRA успешно добавлен.")
    else:
        print("ℹ️ Изменений не требуется — ключ уже существует.")

if __name__ == "__main__":
    main()

✅ Cleaned_DATA_QLoRA успешно добавлен.


In [None]:
import json
import time
from datetime import datetime, timezone

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

# ----------------------------
# CONFIG
# ----------------------------
MODEL_ID = "Qwen/Qwen2.5-3B-Instruct"
ADAPTER_PATH = "./Clean_Qwen3B_QLoRA/adapter"
JSON_PATH = "./QLoRA.json"

DTYPE = torch.float16

# ----------------------------
# QUANTIZATION
# ----------------------------
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=DTYPE,
)

# ----------------------------
# TOKENIZER
# ----------------------------
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    trust_remote_code=True
)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# ----------------------------
# INFERENCE FUNCTION
# ----------------------------
def run_inference(model, query: str) -> dict:
    messages = [
        {
            "role": "system",
            "content": "You are a licensed medical doctor. Respond in a professional, neutral, and explanatory tone."
        },
        {
            "role": "user",
            "content": query
        }
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
        padding=True
    ).to(model.device)

    attention_mask = inputs.ne(tokenizer.pad_token_id)
    prompt_tokens = inputs.shape[1]

    start_time = time.perf_counter()

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            attention_mask=attention_mask,
            max_new_tokens=1024,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

    latency = time.perf_counter() - start_time

    generated_tokens = outputs.shape[1] - prompt_tokens
    total_tokens = outputs.shape[1]

    generated = outputs[:, prompt_tokens:]
    text = tokenizer.decode(generated[0], skip_special_tokens=True).strip()

    return {
        "text": text,
        "latency_sec": round(latency, 4),
        "prompt_tokens": int(prompt_tokens),
        "generated_tokens": int(generated_tokens),
        "total_tokens": int(total_tokens),
        "timestamp_utc": datetime.now(timezone.utc).isoformat()
    }

# ----------------------------
# LOAD LoRA MODEL
# ----------------------------
print("Loading LoRA model...")
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

lora_model = PeftModel.from_pretrained(
    base_model,
    ADAPTER_PATH,
)
lora_model.eval()

# ----------------------------
# LOAD JSON
# ----------------------------
with open(JSON_PATH, "r", encoding="utf-8") as f:
    results = json.load(f)

# ----------------------------
# LoRA INFERENCE LOOP
# ----------------------------
for key in sorted(results.keys(), key=lambda x: int(x)):
    entry = results[key]

    # 🔒 защита на случай кривых записей
    if "Cleaned_DATA_QLoRA" not in entry:
        continue

    # ⏭ skip если уже есть результат
    if entry["Cleaned_DATA_QLoRA"].get("text"):
        continue

    print(f"[Cleaned LoRA] QA #{key}")

    lora_out = run_inference(lora_model, entry["query"])

    # ✅ запись ТОЛЬКО в Cleaned_DATA_QLoRA
    entry["Cleaned_DATA_QLoRA"] = lora_out

    # 💾 безопасный инкрементальный save
    with open(JSON_PATH, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

print("Cleaned LoRA inference completed.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading LoRA model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[Cleaned LoRA] QA #1
[Cleaned LoRA] QA #2
[Cleaned LoRA] QA #3
[Cleaned LoRA] QA #4
[Cleaned LoRA] QA #5
[Cleaned LoRA] QA #6
[Cleaned LoRA] QA #7
[Cleaned LoRA] QA #8
[Cleaned LoRA] QA #9
[Cleaned LoRA] QA #10
[Cleaned LoRA] QA #11
[Cleaned LoRA] QA #12
[Cleaned LoRA] QA #13
[Cleaned LoRA] QA #14
[Cleaned LoRA] QA #15
[Cleaned LoRA] QA #16
[Cleaned LoRA] QA #17
[Cleaned LoRA] QA #18
[Cleaned LoRA] QA #19
[Cleaned LoRA] QA #20
[Cleaned LoRA] QA #21
[Cleaned LoRA] QA #22
[Cleaned LoRA] QA #23
[Cleaned LoRA] QA #24
[Cleaned LoRA] QA #25
[Cleaned LoRA] QA #26
[Cleaned LoRA] QA #27
[Cleaned LoRA] QA #28
[Cleaned LoRA] QA #29
[Cleaned LoRA] QA #30
Cleaned LoRA inference completed.
