In [None]:
!pip install nbformat

In [None]:
import nbformat

notebook_path = "Finetuned_mistral.ipynb"

with open(notebook_path, "r", encoding="utf-8") as f:
    nb = nbformat.read(f, as_version=4)

# Remove the problematic widgets metadata
if "widgets" in nb["metadata"]:
    del nb["metadata"]["widgets"]

# Save back the cleaned notebook
with open(notebook_path, "w", encoding="utf-8") as f:
    nbformat.write(nb, f)

print("✅ Cleaned metadata.widgets from notebook!")

In [None]:
import json

data = [
    {
        "instruction": "Patient reports fever, cough, and sore throat. What is the most likely disease?",
        "output": "Flu"
    },
    {
        "instruction": "Patient reports chest pain and shortness of breath.",
        "output": "Pneumonia"
    },
    {
        "instruction": "Patient reports joint pain and morning stiffness.",
        "output": "Rheumatoid Arthritis"
    }
]

with open("symptoms_dataset.jsonl", "w") as f:
    for entry in data:
        f.write(json.dumps(entry) + "\n")


In [None]:
from unsloth import FastLanguageModel
from datasets import load_dataset
from transformers import TrainingArguments, Trainer


In [None]:
# Load model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-bnb-4bit",
    max_seq_length = 2048,
    load_in_4bit = True
)

In [None]:

# Apply LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r = 64,
    lora_alpha = 16,
    lora_dropout = 0.1,
    target_modules = ["q_proj", "v_proj"]
)

In [None]:
# Load dataset
dataset = load_dataset("json", data_files="symptoms_dataset.jsonl")["train"]
dataset = dataset.train_test_split(test_size=0.1)

In [None]:


# Preprocessing function
def tokenize(example):
    prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
    return tokenizer(prompt, truncation=True, padding="max_length", max_length=512)




In [None]:


# Tokenize dataset
tokenized_dataset = dataset.map(tokenize)
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"])

In [None]:
# Training args
training_args = TrainingArguments(
    output_dir="output",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
)

In [None]:
# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
)

trainer.train()
trainer.save_model("output/")

In [None]:
from google.colab import files
import shutil

shutil.make_archive("mistral_symptom_predictor", 'zip', "output")
files.download("mistral_symptom_predictor.zip")
