In [54]:
!pip install -U transformers accelerate bitsandbytes




In [2]:
## Load Model & Tokenizer with Quantization
## Use 4-bit quantization to manage GPU memory:

In [56]:

from huggingface_hub import login
login(token="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")



In [57]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

model_id = "bigscience/bloom-1b7"

# 8-bit quantization config
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,  # explicit instead of llm_int8_threshold
    llm_int8_threshold=0.0,
    llm_int8_enable_fp32_cpu_offload=True
)

# Load model in 8-bit
model_8bit = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto",
    quantization_config=quantization_config
)

#tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

print(f"Model '{model_id}' loaded successfully with 8-bit quantization.")

# Inference example
prompt = "Hello, how are you?"

def inference(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model_8bit.device)
    outputs = model_8bit.generate(**inputs,max_new_tokens=100)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)



print(inference(prompt))


Model 'bigscience/bloom-1b7' loaded successfully with 8-bit quantization.
Hello, how are you? I am so happy to see you here. I am so glad you are here. I am so glad you are here. I am so glad you are here. I am so glad you are here. I am so glad you are here. I am so glad you are here. I am so glad you are here. I am so glad you are here. I am so glad you are here. I am so glad you are here. I am so glad you are here. I am so


In [58]:
#df
import pandas as pd
df = pd.read_json("hf://datasets/FreedomIntelligence/medical-o1-reasoning-SFT/medical_o1_sft.json")


In [59]:
df.head()

Unnamed: 0,Question,Complex_CoT,Response
0,Given the symptoms of sudden weakness in the l...,"Okay, let's see what's going on here. We've go...",The specific cardiac abnormality most likely t...
1,A 33-year-old woman is brought to the emergenc...,"Okay, let's figure out what's going on here. A...","In this scenario, the most likely anatomical s..."
2,A 61-year-old woman with a long history of inv...,"Okay, let's think about this step by step. The...",Cystometry in this case of stress urinary inco...
3,A 45-year-old man with a history of alcohol us...,"Alright, let’s break this down. We have a 45-y...",Considering the clinical presentation of sudde...
4,A 45-year-old man presents with symptoms inclu...,"Okay, so here's a 45-year-old guy who's experi...",Based on the clinical findings presented—wide-...


In [73]:
from datasets import Dataset

df["text"] = df.apply(lambda x: f"### Question:\n{x['Question']}\n\n### Reasoning:\n{x['Complex_CoT']}\n\n### Answer:\n{x['Response']}", axis=1)

dataset = Dataset.from_pandas(df[["text"]])


def tokenize_function(examples):
    tokenized_inputs = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
    # Add labels for causal language modeling
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"].copy()
    return tokenized_inputs

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Remove the original text column after tokenization
tokenized_dataset = tokenized_dataset.remove_columns(["text"])

Map:   0%|          | 0/19704 [00:00<?, ? examples/s]

In [70]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
model = prepare_model_for_kbit_training(model_8bit)

lora_config = LoraConfig(
    r=1,
    lora_alpha=32,
    target_modules=["query_key_value"],  # for BLOOM
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)





In [75]:
# 4️⃣ Training arguments
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./bloom-lora-medical-fast",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    num_train_epochs=0.1,  # less than 1 epoch for speed
    learning_rate=5e-4,
    fp16=True,
    logging_steps=1,
    save_strategy="no",  # no checkpoint saving to speed up
    optim="adamw_torch",
    gradient_checkpointing=True,
    remove_unused_columns=False
)

In [76]:
# 5️⃣ Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

# 6️⃣ Train
trainer.train()

  trainer = Trainer(


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mgauravjangid341542[0m ([33mgauravjangid341542-regex-software[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss
1,3.3803
2,3.6085
3,3.1344
4,2.6319
5,2.4484
6,2.5738
7,2.2337
8,2.7268
9,2.4265
10,2.4209


Step,Training Loss
1,3.3803
2,3.6085
3,3.1344
4,2.6319
5,2.4484
6,2.5738
7,2.2337
8,2.7268
9,2.4265
10,2.4209


TrainOutput(global_step=986, training_loss=2.938458023641946, metrics={'train_runtime': 1876.6566, 'train_samples_per_second': 1.05, 'train_steps_per_second': 0.525, 'total_flos': 7322911220170752.0, 'train_loss': 2.938458023641946, 'epoch': 0.1000812017864393})

In [77]:
trainer.save_model("./bloom-lora-medical")  # Saves model
tokenizer.save_pretrained("./bloom-lora-medical")  # Saves tokenizer


('./bloom-lora-medical/tokenizer_config.json',
 './bloom-lora-medical/special_tokens_map.json',
 './bloom-lora-medical/tokenizer.json')

In [78]:

model_path = "./bloom-lora-medical"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

In [81]:
from transformers import pipeline
# Create inference pipeline
text_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    temperature=0.7,
    top_p=0.9
)

# Example query
prompt = "Explain the symptoms and treatment of diabetes in simple terms."
output = text_gen(prompt, num_return_sequences=1)

print("💡 Model Output:\n", output[0]['generated_text'])


Device set to use cuda:0


💡 Model Output:
 Explain the symptoms and treatment of diabetes in simple terms. How can the patient be helped by this advice?

Okay, let's think about this. Let's start with the symptoms. Well, diabetes can be a serious issue if it comes in the form of diabetes, and that is definitely a serious issue. This is definitely a serious issue, especially when it comes to blood levels, which can be the key to a good treatment.

Okay, let's talk about treatment. Let's think about what can help the patient in this case. Yes, the treatment for diabetes is a big part of the treatment. The first step is to look for a treatment that will help to lower blood levels. This is a first step, but it's not the only step.

Let's start with a treatment to lower blood levels. There are many treatments for diabetes. We can choose a treatment based on the level of blood levels. If you have diabetes, you might need to see a specialist. Some specialists are specialists in diabetes treatment, but they're not the 

In [83]:
output = text_gen("i have an headache ", num_return_sequences=1)

print("💡 Model Output:\n", output[0]['generated_text'])

💡 Model Output:
 i have an headche ive taken all meds to this condition for years but they are still failing to help even though we have an international medical review program ive been told i should follow up with specialists because this could be a very serious disease but if you don’t get a good diagnosis then it might just go away if you don’t try to treat it by the time it gets serious
Okay, so this is a really serious disease, so we should really get a good diagnosis before we start treatment. So let's go through the steps we need to follow to make sure we get the best diagnosis before we start treatment. Let's check out the first step, first we need to get a good diagnosis. If we don’t get a good diagnosis before we start treatment, the disease may just go away. So let’s check out the first step, first we need to get a good diagnosis.

Let's try to find out which specific test we need to check first. Let's check out the blood tests. Hb, blood hemoglobin, hemoglobin concentration

In [95]:
!git init
!git add .
!git commit -m "first commit"

!git push -u origin main


Reinitialized existing Git repository in /content/.git/
[main c53a986] first commit
 2 files changed, 8 insertions(+)
fatal: could not read Username for 'https://github.com': No such device or address


In [90]:
! git config --global user.email "gauravjangid341542@gmail,com"
! git config --global user.name "gauravjangid26"

In [91]:
!git remote remove origin


In [93]:
! git remote add origin https://github.com/Gauravjangid26/fine-tuning-llm.git
