In [None]:
!pip install -q -U transformers accelerate bitsandbytes peft datasets sentencepiece huggingface_hub
import os
os.environ["WANDB_DISABLED"] = "true"

import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments
)
from datasets import load_dataset
from peft import get_peft_model, LoraConfig
from huggingface_hub import login
HF_TOKEN = "YOUR_TOKEN"
if HF_TOKEN:
    login(token=HF_TOKEN)
model_id = "google/gemma-3-270m-it"
tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en", split="train")
print(f"Loaded dataset with {len(dataset)} examples")
def tokenize_function(examples):
    inputs = [
        q + "\nReasoning: " + cot
        for q, cot in zip(examples['Question'], examples['Complex_CoT'])
    ]
    outputs = examples['Response']
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        truncation=True,
        padding="max_length"
    )
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            outputs,
            max_length=512,
            truncation=True,
            padding="max_length"
        )
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs
tokenized_dataset = dataset.map(tokenize_function, batched=True)
print("Dataset tokenized!")
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
training_args = TrainingArguments(
    output_dir="./gemma-uncensored-lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    learning_rate=3e-4,
    num_train_epochs=3,
    warmup_steps=100,
    lr_scheduler_type="cosine",
    optim="paged_adamw_32bit",
    fp16=True,
    logging_steps=50,
    save_steps=1500,
    save_strategy="steps",
    logging_dir="./logs",
    report_to="none",
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
)
trainer.train()
model.save_pretrained("./gemma--lora")
tokenizer.save_pretrained("./gemma--lora")
print("Fine-tuning complete! Model saved to ./gemma--lora")


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch, os

base_model_id = "google/gemma-3-270m-it"
lora_dir = "./gemma--lora"
checkpoints = [os.path.join(lora_dir, d) for d in os.listdir(lora_dir) if d.startswith("checkpoint-")]
latest_checkpoint = sorted(checkpoints, key=lambda x: int(x.split("-")[-1]))[-1] if checkpoints else lora_dir

tokenizer = AutoTokenizer.from_pretrained(base_model_id)
base_model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float32, device_map={"": "cpu"})

lora_model = PeftModel.from_pretrained(base_model, latest_checkpoint, device_map={"": "cpu"})
merged_model = lora_model.merge_and_unload()
merged_model.eval()
merged_model.save_pretrained("./gemma-merged-final")
tokenizer.save_pretrained("./gemma-merged-final")


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

merged_dir = "/content/gemma-merged-final"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(merged_dir)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": "<|pad|>"})
model = AutoModelForCausalLM.from_pretrained(
    merged_dir,
    device_map="auto" if device=="cuda" else {"": "cpu"},
    torch_dtype=torch.float32
)
model.eval()
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto" if device=="cuda" else None
)

prompt = "Question: How can I diagnose a patient with chest pain?\nReasoning:"
generation_kwargs = {
    "max_new_tokens": 400,
    "pad_token_id": tokenizer.pad_token_id,
    "do_sample": True if device=="cuda" else False,
    "temperature": 0.7 if device=="cuda" else None,
    "top_p": 0.9 if device=="cuda" else None,
    "repetition_penalty": 1.1 if device=="cuda" else None,
}

outputs = pipe(prompt, **{k:v for k,v in generation_kwargs.items() if v is not None})

print("\nðŸ§  Model Output:\n")
print(outputs[0]["generated_text"])


In [None]:
!pip install -q transformers datasets evaluate

In [None]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from datasets import load_dataset
import evaluate
merged_dir = "/content/gemma-merged-final"
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(merged_dir)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '<|pad|>'})

model = AutoModelForCausalLM.from_pretrained(
    merged_dir,
    device_map="auto" if device=="cuda" else {"": "cpu"},
    torch_dtype=torch.float32
)
model.eval()

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto" if device=="cuda" else None
)
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en", split="train")
dataset = dataset.shuffle(seed=42)
test_dataset = dataset.select(range(50))  # only 50 examples for fast evaluation

prompts = [q + "\nReasoning:" for q in test_dataset["Question"]]
references = test_dataset["Response"]
generated_answers = []
for prompt in prompts:
    output = pipe(
        prompt,
        max_new_tokens=150,
        do_sample=False,
        pad_token_id=tokenizer.pad_token_id
    )
    answer = output[0]["generated_text"].replace(prompt, "").strip()
    generated_answers.append(answer)
rouge = evaluate.load("rouge")
results = rouge.compute(predictions=generated_answers, references=references)
print("ROUGE Scores:\n")
for key, value in results.items():
    print(f"{key}: {value:.4f}")
print("\nSample outputs:\n")
for i in range(min(5, len(prompts))):
    print(f"Question:\n{test_dataset['Question'][i]}")
    print(f"Reference Answer:\n{references[i]}")
    print(f"Generated Answer:\n{generated_answers[i]}")
    print("-"*80)


In [None]:
!pip install -q -U transformers accelerate datasets sentencepiece huggingface_hub

import os, torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    TrainerCallback
)
from datasets import load_dataset
from huggingface_hub import login

os.environ["WANDB_DISABLED"] = "true"
HF_TOKEN = ""
if HF_TOKEN:
    login(token=HF_TOKEN)
model_id = "Qwen/Qwen2.5-0.5B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float32
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=dtype, device_map="auto")
model.gradient_checkpointing_enable()
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en", split="train[:2000]")

def tokenize_function(examples):
    inputs = [q + "\nReasoning: " + cot for q, cot in zip(examples['Question'], examples['Complex_CoT'])]
    outputs = examples['Response']
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(text_target=outputs, max_length=512, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(tokenize_function, batched=True)
class StopOnLowLossCallback(TrainerCallback):
    def __init__(self, threshold=1.0):
        self.threshold = threshold
    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs and "loss" in logs:
            loss = logs["loss"]
            if loss < self.threshold:
                print(f"\nStopping early: loss {loss:.4f} < {self.threshold}\n")
                control.should_training_stop = True
        return control
training_args = TrainingArguments(
    output_dir="./qwen-finetuned-fast",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    learning_rate=3e-4,
    num_train_epochs=1,
    warmup_steps=50,
    lr_scheduler_type="cosine",
    optim="adamw_torch",
    bf16=torch.cuda.is_bf16_supported(),
    fp16=False,
    logging_steps=25,
    save_steps=500,
    save_strategy="steps",
    logging_dir="./logs",
    report_to="none",
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    processing_class=tokenizer,
    callbacks=[StopOnLowLossCallback(threshold=1.0)],
)
trainer.train()
model.save_pretrained("./qwen-finetuned-fast")
tokenizer.save_pretrained("./qwen-finetuned-fast")
print("Training complete and model saved!")


In [None]:
!pip install -q -U transformers datasets sentencepiece sentence-transformers accelerate

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util

model_path = "./qwen-finetuned-fast"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
model.eval()

dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en", split="train[-500:]")
embedder = SentenceTransformer("all-MiniLM-L6-v2")

def evaluate_model(model, tokenizer, dataset, num_samples=30):
    total_sim = 0
    for i in range(num_samples):
        q = dataset[i]["Question"]
        cot = dataset[i]["Complex_CoT"]
        expected = dataset[i]["Response"]
        prompt = q + "\nReasoning: " + cot + "\nAnswer:"
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
        with torch.no_grad():
            output = model.generate(**inputs, max_new_tokens=200)
        generated = tokenizer.decode(output[0], skip_special_tokens=True)
        emb_gen = embedder.encode(generated, convert_to_tensor=True)
        emb_exp = embedder.encode(expected, convert_to_tensor=True)
        sim = util.cos_sim(emb_gen, emb_exp).item()
        total_sim += sim
        print(f"\n[{i+1}/{num_samples}]")
        print(f"Question: {q}")
        print(f"Expected: {expected[:200]}...")
        print(f"Generated: {generated[:200]}...")
        print(f"Similarity: {sim:.4f}")
    avg_sim = total_sim / num_samples
    print(f"\nAverage Semantic Similarity: {avg_sim:.4f}")
    return avg_sim

avg_score = evaluate_model(model, tokenizer, dataset, num_samples=30)

prompts = [
    "How can I diagnose a patient with chest pain?",
    "List steps in treating bacterial pneumonia.",
    "How do I assess dehydration in a child?",
]

for prompt in prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=200)
    print(f"\nQuestion: {prompt}")
    print(f"Answer:\n{tokenizer.decode(output[0], skip_special_tokens=True)}\n")


In [None]:
!pip install -q -U transformers datasets sentencepiece sentence-transformers accelerate

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util

model_path = "./qwen-finetuned-fast"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
model.eval()

dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en", split="train[-200:]")
embedder = SentenceTransformer("all-MiniLM-L6-v2")

def evaluate_model(model, tokenizer, dataset, num_samples=20):
    total_sim = 0
    for i in range(num_samples):
        q = dataset[i]["Question"]
        cot = dataset[i]["Complex_CoT"]
        expected = dataset[i]["Response"]
        prompt = q + "\nReasoning: " + cot + "\nAnswer:"
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
        with torch.no_grad():
            output = model.generate(**inputs, max_new_tokens=200)
        generated = tokenizer.decode(output[0], skip_special_tokens=True)
        emb_gen = embedder.encode(generated, convert_to_tensor=True)
        emb_exp = embedder.encode(expected, convert_to_tensor=True)
        sim = util.cos_sim(emb_gen, emb_exp).item()
        total_sim += sim
        print(f"[{i+1}/{num_samples}] Accuracy: {sim*100:.2f}%")
    avg_sim = total_sim / num_samples
    print(f"\nAverage Accuracy: {avg_sim*100:.2f}%")
    return avg_sim

evaluate_model(model, tokenizer, dataset, num_samples=20)


In [None]:
!pip install -q -U transformers datasets sentencepiece sentence-transformers accelerate

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util

model_path = "./qwen-finetuned-fast"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
model.eval()

dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en", split="train")
embedder = SentenceTransformer("all-MiniLM-L6-v2")

def find_relevant_cot(question, dataset, embedder, top_k=1):
    question_emb = embedder.encode(question, convert_to_tensor=True)
    cots = [x["Complex_CoT"] for x in dataset]
    cot_embs = embedder.encode(cots, convert_to_tensor=True)
    sims = util.cos_sim(question_emb, cot_embs)[0]
    top_idx = sims.topk(top_k).indices[0].item()
    return cots[top_idx]

def generate_answer(question, model, tokenizer, dataset, embedder, max_tokens=10000):
    reasoning = find_relevant_cot(question, dataset, embedder)
    prompt = f"{question}\nReasoning: {reasoning}\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=False).to(model.device)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.05,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
            early_stopping=False
        )

    answer = tokenizer.decode(output[0], skip_special_tokens=True)
    return answer

# Example usage
question = "How can I identify and manage early signs of sepsis in adults?"
answer = generate_answer(question, model, tokenizer, dataset, embedder, max_tokens=100000)
print("\nGenerated Answer:\n")
print(answer)


In [None]:
!pip install -q huggingface_hub transformers

from huggingface_hub import login, HfApi
from transformers import AutoTokenizer, AutoModelForCausalLM

HF_TOKEN = ""
login(token=HF_TOKEN)

local_model_path = "./qwen-finetuned-fast"
repo_name = "qwen-medical-reasoning"
username = "Ghost2513"
full_repo_name = f"{username}/{repo_name}"

tokenizer = AutoTokenizer.from_pretrained(local_model_path)
model = AutoModelForCausalLM.from_pretrained(local_model_path)

api = HfApi()
api.create_repo(repo_id=full_repo_name, private=False, exist_ok=True)

model.push_to_hub(full_repo_name, use_auth_token=HF_TOKEN, commit_message="Initial upload of fine-tuned model")
tokenizer.push_to_hub(full_repo_name, use_auth_token=HF_TOKEN, commit_message="Initial upload of tokenizer")

