In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Upload the dataset file

In [None]:
from google.colab import files
import pandas as pd
import numpy as np
import io

uploaded = files.upload()  # This opens an upload dialog


# **Convert it to a dataframe**

In [None]:
# Get the uploaded file name
json_filename = list(uploaded.keys())[0]

# Read JSONL file (JSON Lines format)
df = pd.read_json(json_filename, lines=True)

# Display the DataFrame
df.head()

Unnamed: 0,instruction,input,output
0,Answer the following question about prenatal c...,Why are prenatal visits necessary?,"To evaluate health risks for mother and baby, ..."
1,Answer the following question about prenatal c...,How long is the waiting period at each visit?,That depends on how busy your clinic is; if ve...
2,Answer the following question about prenatal c...,Is there a charge?,There is $10 fee for the pregnancy test.
3,Answer the following question about prenatal c...,What should I bring for my visit?,"You will need a valid passport, driver’s licen..."
4,Answer the following question about prenatal c...,Are the results confidential?,Yes.


# **Check for duplicates**

In [None]:
df[df.duplicated()]


Unnamed: 0,instruction,input,output
117,Answer the following question about prenatal c...,What are the benefits of prenatal care for wom...,Prenatal care can help women with a history of...
118,Answer the following question about prenatal c...,Can I continue to take my regular medications ...,It's essential to consult your healthcare prov...
119,Answer the following question about prenatal c...,What are the risks of untreated anemia during ...,Untreated anemia during pregnancy can increase...
120,Answer the following question about prenatal c...,How can I manage back pain during pregnancy if...,"To manage back pain during pregnancy, try prac..."
212,Answer the following question about postnatal ...,How can new mothers prioritize their physical ...,New mothers can prioritize their physical heal...
215,Answer the following question about postnatal ...,What are the signs of postpartum depression in...,Signs of postpartum depression in new fathers ...
216,Answer the following question about postnatal ...,How can new mothers balance their work and fam...,New mothers can balance their work and family ...
218,Answer the following question about postnatal ...,How can new mothers manage their finances afte...,New mothers can manage their finances by creat...
219,Answer the following question about postnatal ...,What are the signs of postpartum trauma?,Signs of postpartum trauma include feelings of...
220,Answer the following question about postnatal ...,How can new mothers prioritize their mental he...,New mothers can prioritize their mental health...


In [None]:
# Count occurrences of each stage
stage_counts = df["instruction"].value_counts()

# Display the count of each category
print(stage_counts)

instruction
Answer the following question about birth care.            120
Answer the following question about prenatal care.         117
Answer the following question about preconception care.    110
Answer the following question about postnatal care.        100
Name: count, dtype: int64




# **Drop Duplicates**

In [None]:
df = df.drop_duplicates()


# **Confirm if Duplicates were dropped**

In [None]:
# Assuming df is your DataFrame
df_cleaned = df.drop_duplicates()

# Save the cleaned dataset to a CSV file
df_cleaned.to_csv("cleaned_dataset.csv", index=False)

# If using Google Colab, download the file
from google.colab import files
files.download("cleaned_dataset.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# **To prevent biases, we used undersampling to have 100 for each catergory**




In [None]:
# Count occurrences of each stage
stage_counts = df["instruction"].value_counts()
print("Stage Counts Before Balancing:\n", stage_counts)

# Identify the minimum count for balancing
min_count = stage_counts.min()  # Targeting the smallest category count

# Perform undersampling and update df
df = df.groupby("instruction", group_keys=False).apply(lambda x: x.sample(min_count, random_state=42))

# Save the balanced dataset
df.to_csv("balanced_dataset.csv", index=False)

# Display new counts
print("Stage Counts After Balancing:\n", df["instruction"].value_counts())
print("Undersampling complete. Balanced dataset saved as 'balanced_dataset.csv'.")

Stage Counts Before Balancing:
 instruction
Answer the following question about birth care.            120
Answer the following question about prenatal care.         117
Answer the following question about preconception care.    110
Answer the following question about postnatal care.        100
Name: count, dtype: int64
Stage Counts After Balancing:
 instruction
Answer the following question about birth care.            100
Answer the following question about postnatal care.        100
Answer the following question about preconception care.    100
Answer the following question about prenatal care.         100
Name: count, dtype: int64
Undersampling complete. Balanced dataset saved as 'balanced_dataset.csv'.


  df = df.groupby("instruction", group_keys=False).apply(lambda x: x.sample(min_count, random_state=42))


# **Check missing Values**

In [None]:
# Check for missing values
missing_values = df.isnull().sum()
print("\nMissing Values:\n", missing_values[missing_values > 0])




Missing Values:
 Series([], dtype: int64)


In [None]:
# Standardize text formatting for the 'instruction' column (optional)
df["instruction"] = df["instruction"].str.strip().str.lower()
# Count occurrences of each category
stage_counts = df["instruction"].value_counts()
print("\nCategory Counts Before Balancing:\n", stage_counts)


Category Counts Before Balancing:
 instruction
answer the following question about birth care.            100
answer the following question about postnatal care.        100
answer the following question about preconception care.    100
answer the following question about prenatal care.         100
Name: count, dtype: int64


# **Install differnet dependancies**

In [None]:
!pip install fsspec==2024.12.0



Collecting fsspec==2024.12.0
  Using cached fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Using cached fsspec-2024.12.0-py3-none-any.whl (183 kB)
Installing collected packages: fsspec
Successfully installed fsspec-2024.12.0


In [None]:
!pip install datasets gcsfs bigframes



Collecting datasets
  Using cached datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting gcsfs
  Using cached gcsfs-2025.3.0-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting bigframes
  Downloading bigframes-1.41.0-py2.py3-none-any.whl.metadata (5.4 kB)
INFO: pip is looking at multiple versions of gcsfs to determine which version is compatible with other requirements. This could take a while.
Collecting gcsfs
  Downloading gcsfs-2025.2.0-py2.py3-none-any.whl.metadata (1.9 kB)
  Downloading gcsfs-2024.12.0-py2.py3-none-any.whl.metadata (1.6 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<7.0.0,>=3.20.2 (from google-cloud-bigquery-connection>=1.12.0->bigframes)
  Downloading protobuf-5.29.4-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets>=7.7.1->bigframes)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Using cached datasets-3.4.1-py3-none-any.whl (487 kB)
Downloading gcsfs

In [None]:
!pip install --upgrade --force-reinstall gcsfs


Collecting gcsfs
  Using cached gcsfs-2025.3.0-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1 (from gcsfs)
  Using cached aiohttp-3.11.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting decorator>4.1.2 (from gcsfs)
  Using cached decorator-5.2.1-py3-none-any.whl.metadata (3.9 kB)
Collecting fsspec==2025.3.0 (from gcsfs)
  Using cached fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting google-auth>=1.2 (from gcsfs)
  Using cached google_auth-2.38.0-py2.py3-none-any.whl.metadata (4.8 kB)
Collecting google-auth-oauthlib (from gcsfs)
  Using cached google_auth_oauthlib-1.2.1-py2.py3-none-any.whl.metadata (2.7 kB)
Collecting google-cloud-storage (from gcsfs)
  Using cached google_cloud_storage-3.1.0-py2.py3-none-any.whl.metadata (12 kB)
Collecting requests (from gcsfs)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs)
  U

In [None]:
# Install necessary libraries in Google Colab
!pip install datasets transformers torch


Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Using cached fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Using cached fsspec-2024.12.0-py3-none-any.whl (183 kB)
Installing collected packages: fsspec
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.0
    Uninstalling fsspec-2025.3.0:
      Successfully uninstalled fsspec-2025.3.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2025.3.0 requires fsspec==2025.3.0, but you have fsspec 2024.12.0 which is incompatible.[0m[31m
[0mSuccessfully installed fsspec-2024.12.0


# **Connecting to hugging face ( repository used to access different pretrained models) **

In [None]:
!huggingface-cli login



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: fineG

In [None]:
pip install wandb

Collecting protobuf!=4.21.0,!=5.28.0,<6,>=3.19.0 (from wandb)
  Using cached protobuf-5.29.4-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Using cached protobuf-5.29.4-cp38-abi3-manylinux2014_x86_64.whl (319 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 6.30.1
    Uninstalling protobuf-6.30.1:
      Successfully uninstalled protobuf-6.30.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-cloud-aiplatform 1.84.0 requires google-cloud-storage<3.0.0dev,>=1.32.0, but you have google-cloud-storage 3.1.0 which is incompatible.[0m[31m
[0mSuccessfully installed protobuf-5.29.4


In [None]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msalome-chemiat[0m ([33msalome-chemiat-strathmore-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
import torch
print(torch.cuda.is_available())  # Should return True


True


In [None]:
!pip install --upgrade bitsandbytes
!pip install --upgrade accelerate transformers

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl (76.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.3
Collecting transformers
  Downloading transformers-4.50.0-py3-none-any.whl.metadata (39 kB)
Downloading transformers-4.50.0-py3-none-any.whl (10.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m91.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.49.0
    Uninstalling transformers-4.49.0:
      Successfully uninstalled transformers-4.49.0
Successfully installed transformers-4.50.0


In [None]:
!pip uninstall bitsandbytes
!pip install bitsandbytes


Found existing installation: bitsandbytes 0.45.3
Uninstalling bitsandbytes-0.45.3:
  Would remove:
    /usr/local/lib/python3.11/dist-packages/bitsandbytes-0.45.3.dist-info/*
    /usr/local/lib/python3.11/dist-packages/bitsandbytes/*
Proceed (Y/n)? Y
  Successfully uninstalled bitsandbytes-0.45.3
Collecting bitsandbytes
  Using cached bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Using cached bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl (76.1 MB)
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.3


# **Start Training our Model**

In [None]:
import torch
import pandas as pd
from datasets import Dataset, DatasetDict
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model
from sklearn.model_selection import train_test_split

# Load and preprocess dataset
def extract_stage(text):
    """
    Classifies text into different care stages based on the content.
    """
    text = text.lower()
    if "prenatal c" in text:
        return "prenatal_care"
    elif "preconception c" in text:
        return "preconception_care"
    elif "birth c" in text:
        return "birth_care"
    elif "postnatal c" in text:
        return "postnatal_care"
    return "unknown"

df["stage"] = df["instruction"].apply(extract_stage)

# Stratified split
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df["stage"], random_state=42)

# Convert to Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

# Load model & tokenizer
model_name = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Ensure consistent padding

# Quantization config for 4-bit precision
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map="auto",
)

# Tokenization function
def tokenize_function(examples):
    """
    Tokenizes and prepares the dataset with input-output pairs for training.
    """
    input_texts = examples["input"]
    output_texts = examples["output"]

    combined_texts = [
        f"Question: {input_text} Answer: {output_text} {tokenizer.eos_token}"
        for input_text, output_text in zip(input_texts, output_texts)
    ]

    # Tokenization process
    tokenized_inputs = tokenizer(
        combined_texts,
        truncation=True,
        padding="max_length",
        max_length=128,
        return_tensors="pt"
    )

    # Mask labels (ignore input tokens)
    labels = tokenized_inputs["input_ids"].clone()
    input_lengths = [len(tokenizer.encode(f"Question: {input_text}")) for input_text in input_texts]

    for i in range(len(labels)):
        labels[i, :input_lengths[i]] = -100  # Ignore input tokens

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# Tokenize dataset
tokenized_datasets = DatasetDict({
    "train": train_dataset.map(tokenize_function, batched=True),
    "test": test_dataset.map(tokenize_function, batched=True)
})

# Define LoRA configuration
lora_config = LoraConfig(
    r=16,  # Increased rank for better learning
    lora_alpha=32,
    lora_dropout=0.05,  # Reduced dropout to retain more information
    bias="none",
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],  # Targeting attention layers for LoRA
)

# Wrap model with LoRA
model = get_peft_model(model, lora_config)

# Unfreeze LoRA layers for training
for name, param in model.named_parameters():
    param.requires_grad = "lora" in name or "adapter" in name

# Training parameters
bf16_supported = torch.cuda.get_device_capability(0)[0] >= 8
training_args = TrainingArguments(
    output_dir="models/llama_finetuned",
    per_device_train_batch_size=4,  # Increased batch size
    gradient_accumulation_steps=2,  # Adjusted for better efficiency
    num_train_epochs=7,  # Increased epochs to allow for better model convergence
    save_total_limit=3,
    save_strategy="epoch",
    evaluation_strategy="epoch",
    fp16=not bf16_supported,
    bf16=bf16_supported,
    optim="adamw_bnb_8bit",
    logging_steps=10,
    logging_dir="./logs",
)

# Custom Trainer with loss function
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        """
        Custom loss function to handle CrossEntropyLoss with ignored tokens.
        """
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = labels[..., 1:].contiguous()

        loss_fct = torch.nn.CrossEntropyLoss(ignore_index=-100)
        loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

        return (loss, outputs) if return_outputs else loss

# Free memory
torch.cuda.empty_cache()

# Train model
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)

model.train()
trainer.train()

# Save model
model.save_pretrained("models/llama_finetuned")
tokenizer.save_pretrained("models/llama_finetuned")


Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,26.7226
20,8.642
30,2.8339
40,2.7814
50,2.5919
60,2.2509
70,1.9843
80,2.0387
90,1.9867
100,1.8956


('models/llama_finetuned/tokenizer_config.json',
 'models/llama_finetuned/special_tokens_map.json',
 'models/llama_finetuned/tokenizer.json')

In [None]:
!pip install evaluate


Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [None]:
!pip install -q rouge_score

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone


In [None]:
!pip install -q nltk


Test Our model Accuracy using Rougue and BlUE
# **Rougue -**
Best for: Text Summarization & Sentence Similarity
How it Works:

Measures recall (how much of the reference text appears in the generated text).
ROUGE-1, ROUGE-2: Count unigram & bigram matches.
ROUGE-L: Uses longest common subsequence (LCS) to check fluency.

Which ROUGE to Use?

    For keyword-based accuracy → Use ROUGE-1
    For phrase similarity → Use ROUGE-2
    For fluency & coherence (natural sentence structure) → Use ROUGE-L ✅

In [None]:
import torch
from evaluate import load  # ✅ Corrected import
from transformers import AutoModelForCausalLM, AutoTokenizer

#Reload the fine-tuned model and tokenizer
model_name = "models/llama_finetuned"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")

# Move model to evaluation mode
model.eval()

#Define a lightweight test function
def generate_response(prompt):
    with torch.no_grad():  # Disable gradients to save memory
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to("cuda")

        with torch.cuda.amp.autocast():  # Mixed precision
            output = model.generate(**inputs, max_length=64, num_beams=5)

    return tokenizer.decode(output[0], skip_special_tokens=True)

#Process test set in memory-efficient batches
batch_size = 4  # Reduced batch size to minimize memory usage
predictions = []
references = []

for i in range(0, len(test_dataset), batch_size):
    batch = test_dataset.select(range(i, min(i + batch_size, len(test_dataset))))

    prompts = batch["input"]
    expected_outputs = batch["output"]

    generated_outputs = [generate_response(prompt) for prompt in prompts]

    predictions.extend(generated_outputs)
    references.extend(expected_outputs)

    # Clear memory after each batch
    del batch, prompts, expected_outputs, generated_outputs
    torch.cuda.empty_cache()

# Compute accuracy (or other metrics)
metric = load("rouge")
results = metric.compute(predictions=predictions, references=references, rouge_types=["rouge1", "rouge2", "rougeL"])


print("Test Results:", results)

# Free GPU memory after testing
torch.cuda.empty_cache()


  with torch.cuda.amp.autocast():  # Mixed precision
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation

Test Results: {'rouge1': np.float64(0.2401862413889669), 'rouge2': np.float64(0.1056270829743812), 'rougeL': np.float64(0.20429852927885678)}


In [None]:
import torch
from evaluate import load
from transformers import AutoModelForCausalLM, AutoTokenizer
import pandas as pd

# Load model and tokenizer
model_name = "models/llama_finetuned"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
model.eval()

# Define response generation function
def generate_response(prompt):
    with torch.no_grad():
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to("cuda")
        with torch.cuda.amp.autocast():
            output = model.generate(**inputs, max_length=64, num_beams=5)
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Process test set per category
batch_size = 4
metric = load("rouge")
categories = ["preconception care", "prenatal care", "birth care", "postnatal care"]
category_results = {}

for category in categories:
    df_category = test_dataset[test_dataset["instruction"].str.contains(category, case=False)]
    predictions, references = [], []

    for i in range(0, len(df_category), batch_size):
        batch = df_category.iloc[i : i + batch_size]
        prompts = batch["input"].tolist()
        expected_outputs = batch["output"].tolist()
        generated_outputs = [generate_response(prompt) for prompt in prompts]

        predictions.extend(generated_outputs)
        references.extend(expected_outputs)

        torch.cuda.empty_cache()

    # Compute ROUGE scores for each category
    results = metric.compute(predictions=predictions, references=references, rouge_types=["rouge1", "rouge2", "rougeL"])
    category_results[category] = results

# Print category-wise ROUGE scores
for category, scores in category_results.items():
    print(f"\nCategory: {category}")
    print(scores)

# Free GPU memory
torch.cuda.empty_cache()


AttributeError: 'list' object has no attribute 'str'

## **Testing using BlUE**  

Compares n-grams (word sequences) in the generated text with the reference text.
Measures precision (how many generated words match the reference).
Uses a brevity penalty to discourage very short outputs.

In [None]:
import torch
from evaluate import load  # Import evaluation metric
from transformers import AutoModelForCausalLM, AutoTokenizer

# Reload the fine-tuned model and tokenizer
model_name = "models/llama_finetuned"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")

# Move model to evaluation mode
model.eval()

# Define a lightweight test function
def generate_response(prompt):
    with torch.no_grad():  # Disable gradients to save memory
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to("cuda")

        with torch.cuda.amp.autocast():  # Mixed precision
            output = model.generate(**inputs, max_length=64, num_beams=5)


    return tokenizer.decode(output[0], skip_special_tokens=True)

# Process test set in memory-efficient batches
batch_size = 4  # Reduced batch size to minimize memory usage
predictions = []
references = []

for i in range(0, len(test_dataset), batch_size):
    batch = test_dataset.select(range(i, min(i + batch_size, len(test_dataset))))

    prompts = batch["input"]
    expected_outputs = batch["output"]

    generated_outputs = [generate_response(prompt) for prompt in prompts]

    predictions.extend(generated_outputs)
    references.extend([[ref] for ref in expected_outputs])  # BLEU requires list of references

    # Clear memory after each batch
    del batch, prompts, expected_outputs, generated_outputs
    torch.cuda.empty_cache()

# Compute BLEU score
metric = load("bleu")  # Using BLEU instead of ROUGE
results = metric.compute(predictions=predictions, references=references)

print("BLEU Score:", results)

# Free GPU memory after testing
torch.cuda.empty_cache()


In [None]:
from rouge_score import rouge_scorer

def rerank_summaries(candidate_summaries, reference_summary):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

    best_score = 0
    best_summary = ""

    for summary in candidate_summaries:
        scores = scorer.score(reference_summary, summary)
        rouge1_f1 = scores['rouge1'].fmeasure  # Using ROUGE-1 F1 score for ranking

        if rouge1_f1 > best_score:
            best_score = rouge1_f1
            best_summary = summary

    return best_summary

# Example usage
candidates = ["Summary 1", "Summary 2", "Summary 3"]
reference = "Reference summary for comparison"
best_output = rerank_summaries(candidates, reference)
print("Best summary:", best_output)


Best summary: Summary 1


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the fine-tuned model and tokenizer from the saved directory
model = AutoModelForCausalLM.from_pretrained("models/llama_finetuned") # Changed to AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("models/llama_finetuned")

In [None]:
def generate_response(prompt, max_length=100, temperature=0.7, top_p=0.9):
    # Format the prompt to match training data
    formatted_prompt = f"{prompt} {tokenizer.eos_token}"

    # Tokenize input and move to the same device as the model
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

    # Generate response
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_length=max_length,
            temperature=temperature,
            top_p=top_p,
            repetition_penalty=1.2  # Reduce repetition
        )

    # Decode output
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

In [None]:
# Example 1
input_text = "What are common danger signs in pregnancy?"
response = generate_response(input_text)
print("Generated Response:", response)

# Example 2
input_text = "How can I take care of my newborn?"
response = generate_response(input_text)
print("Generated Response:", response)


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated Response: What are common danger signs in pregnancy? Does your doctor tell you when it’s time to worry about a symptom or issue during pregnancy?
It is very important for pregnant women and their partners to be aware of the symptoms that can indicate an underlying health condition. While some problems may not need immediate attention, others might require prompt treatment.
Unusual bleeding
You have vaginal spotting (light red blood mixed with mucus) between one menstrual
Generated Response: How can I take care of my newborn? Question: How can I take care of a baby?
Answer : There are many things you need to do when caring for your new baby. Here's some advice.
First, it is important that the person who will be taking care of your child should have had experience with babies and children before starting work on this project or helping out in any other way. The best place to look would probably be at local nurseries, as well as


In [None]:
def generate_response(prompt, max_length=100, temperature=0.7, top_p=0.9):
    # Format the prompt to match training data
    formatted_prompt = f"{prompt} {tokenizer.eos_token}"

    # Tokenize input
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

    # Generate response
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_length=max_length,
            temperature=temperature,
            top_p=top_p,
            repetition_penalty=1.2  # Reduce repetition
        )

    # Decode output
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response


In [None]:
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)


Map:   0%|          | 0/47 [00:00<?, ? examples/s]

Downloading the Fine-Tuned Model

In [None]:
import shutil
shutil.make_archive("models/llama_finetuned", 'zip', "models/llama_finetuned")
