In [2]:

import os
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset
import re
import json

In [2]:
dataset = load_dataset("json", data_files={"train": "train_unsloth2.jsonl", "validation": "val_unsloth2.jsonl", "test": "test_unsloth2.jsonl"})

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "teknium/OpenHermes-2.5-Mistral-7B",
    max_seq_length = 1024,
    dtype = None,
    load_in_4bit = True,
)


In [8]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""


In [6]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "o_proj","gate_proj","k_proj", "v_proj","up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, 
    bias = "none",   
    use_gradient_checkpointing = True,
    random_state = 3411,
    max_seq_length = 1024,
    use_rslora = False,  
    loftq_config = None, 
)

In [None]:
def formatting_prompts_func(examples):

    instruction = 'Perform Diagnosis'
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for inputx, output in zip(inputs, outputs):
        text = alpaca_prompt.format(instruction, inputx, output) + tokenizer.eos_token
        texts.append(text)
    return { "text" : texts, }

dataset = dataset.map(formatting_prompts_func, batched = True,)

In [9]:
trainer = SFTTrainer(
    model = model,
    train_dataset = dataset['train'],
    #eval_dataset = dataset['validation'],
    dataset_text_field = "text",
    max_seq_length = 1024,
    tokenizer = tokenizer,
    args = TrainingArguments(
        per_device_train_batch_size = 16,
        #gradient_accumulation_steps = 4,
        warmup_steps = 100,
        num_train_epochs = 1,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        output_dir = "mistral_outputs_v3",
        lr_scheduler_type = "linear",
        optim = "adamw_8bit",
        seed = 3407,
        learning_rate = 1e-5
    ),
)
trainer.train(resume_from_checkpoint=True)

In [10]:
model.save_pretrained("mistral_lora_model_v3")
model.save_pretrained_merged("mistral_merged_model_v3", tokenizer, save_method = "merged_16bit",)

In [4]:
def generate_text(model,tokenizer, text, max_length):
    FastLanguageModel.for_inference(model)
    # Generate predictions
    instruction = "Perform Diagnosis with 1-10 diseases in the format- Differential Diagnosis is:\s*(.*?),?\s*and the most likely is **X**"
    instruction2 = """Perform diagnosis and return output as a JSON ,following format:
        {
            'differential_daignosis' // List[str]
            'most_likely' //str
        }
        Don't return any explanation
        """
    inputs = tokenizer(
    [
        
        alpaca_prompt.format(
            'Perform Daignosis',
            text, # input
            "", # output - leave this blank for generation!
        )
    ], return_tensors = "pt").to("cuda")

    outputs = model.generate(**inputs, max_new_tokens = max_length, use_cache = True)
    return tokenizer.batch_decode(outputs)
    
def load_unsloth_model_and_tokenizer(model_path):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = model_path,
        max_seq_length = 1024,
        dtype = None,
        load_in_4bit = True,
        local_files_only = True,
    )
    return model, tokenizer

def extract_diagnosis2(input_string):
    diff_diag_pattern = r"'differential_diagnosis': \[(.*?)\]"
    most_likely_pattern = r"'most_likely': '(.*?)'"
    text = input_string
    # Extract differential_diagnosis
    diff_diag_match = re.search(diff_diag_pattern, text)
    if diff_diag_match:
        diff_diag_str = diff_diag_match.group(1)
        # Split the string into a list of items
        differential_diagnosis = [item.strip().strip("'") for item in diff_diag_str.split(",")]
    else:
        differential_diagnosis = []

    # Extract most_likely
    most_likely_match = re.search(most_likely_pattern, text)
    most_likely = most_likely_match.group(1) if most_likely_match else None
    print(differential_diagnosis, most_likely)
    return most_likely, differential_diagnosis
def extract_diagnosis(input_string):
    match = re.search(r'\*\*(.*?)\*\*', input_string)
    most_likely = match.group(1) if match else '' # Return the matched disease
    
    differential_match = re.search(r'Differential Diagnosis is:\s*(.*?),?\s*and the most likely is', input_string)
    differential_diseases = []
    if differential_match:
        differential_diseases = [d.strip() for d in differential_match.group(1).split(',')]
    
    return most_likely, differential_diseases

def write_diagnosis_to_file(most_likely, differential, filename):
    data = {
        "most_likely_disease": most_likely,
        "differential_diseases": differential
        }
    with open(filename, 'a') as f:
        f.write(json.dumps(data) + '\n')
        
def generate_predictions(test_file, model, tokenizer, output_file, max_length=256):
    """
    Generate predictions for test samples and save to file.
    """
    with open(test_file, 'r') as f:
        content = f.read().strip()
        json_objects = content.split('}')[:-1]
        test_data = [json.loads(f'{obj}}}') for i, obj in enumerate(json_objects)]
    n = 0
    with open(output_file, 'w') as f:
        for item in test_data:
            print(f'item {n}')
            n+=1
            if n<2000 :
                continue
            human_input = item["input"] 
            prediction = generate_text(model, tokenizer, human_input, max_length)[0]
            print(prediction)
            most_likely, differential = extract_diagnosis(prediction)
            write_diagnosis_to_file(most_likely, differential, output_file)
            break

In [5]:
test_file = "test_unsloth2.jsonl"  
model_output_file = "model_predictions2.jsonl"  
model_path = 'mistral_lora_model'
model, tokenizer = load_unsloth_model_and_tokenizer(model_path)

==((====))==  Unsloth 2024.11.7: Fast Mistral patching. Transformers = 4.46.2.
   \\   /|    GPU: NVIDIA H100 80GB HBM3. Max memory: 79.097 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1. CUDA = 9.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2024.11.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [12]:
model.save_pretrained_gguf("v1_gguf", tokenizer, quantization_method = "q4_k_m")

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 1654.79 out of 2015.05 RAM for saving.


100%|██████████| 32/32 [00:00<00:00, 103.61it/s]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...
Done.
==((====))==  Unsloth: Conversion from QLoRA to GGUF information
   \\   /|    [0] Installing llama.cpp will take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF 16bits will take 3 minutes.
\        /    [2] Converting GGUF 16bits to ['q4_k_m'] will take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: [0] Installing llama.cpp. This will take 3 minutes...


RuntimeError: Unsloth: The file 'llama.cpp/llama-quantize' or 'llama.cpp/quantize' does not exist.
But we expect this file to exist! Maybe the llama.cpp developers changed the name?

In [None]:
# Generate predictions from test file
generate_predictions(test_file, model, tokenizer, model_output_file)
print(f"Predictions saved to {model_output_file}")

In [11]:
# Generate predictions through manual prompts
prompt = 'Is your brain bleeding? Yes'
generate_text(model=model,tokenizer=tokenizer, text=prompt, max_length=256)


['<s> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPerform Daignosis\n\n### Input:\nIs your brain bleeding? Yes\n\n### Response:\nDifferential Diagnosis is: Anemia and the most likely is Anemia<|im_end|>']

In [52]:
from rouge_score import rouge_scorer

In [73]:
#Functions for evaluation

def calculate_metrics(pred_set, truth_set):
    tp = len(pred_set & truth_set)  # True Positives: intersection of prediction and truth
    fp = len(pred_set - truth_set)  # False Positives: in prediction but not in truth
    fn = len(truth_set - pred_set)  # False Negatives: in truth but not in prediction

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0

    return precision, recall, f1

def load_jsonl(filename):
    data = []
    with open(filename, 'r', encoding='utf-8') as f:
        for line in f:
            try:
                # Parse each line as a JSON object and append it to the list
                data.append(json.loads(line.strip()))
            except json.JSONDecodeError as e:
                print(f"Error decoding line: {line.strip()} - {e}")
    return data




def evaluation_system(predicted_data, ground_truth_data):
    
    correct_most_likely = 0
    precision_list = []
    recall_list = []
    f1_list =[]
    rouge_scores=[]
    total_cases = len(ground_truth_data)
    for pred, truth in zip(predicted_data, ground_truth_data):
        # Evaluate "Most Likely Disease"
        if pred["most_likely_disease"] == truth["most_likely_disease"]:
            correct_most_likely += 1
        
        # Evaluate "Differential Diagnosis"
        pred_set = set(pred["differential_diseases"])
        truth_set = set(truth["differential_diseases"])
        
        precision, recall, f1 = calculate_metrics(pred_set, truth_set)
        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)
        # ROUGE score
        
        predicted =     " ".join(pred["differential_diseases"]),
        ground_truth =     " ".join(truth["differential_diseases"]),
        scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
        scores = scorer.score(ground_truth[0], predicted[0])
        rouge_scores.append(scores['rougeL'].fmeasure)
    
    accuracy = correct_most_likely / total_cases
    avg_precision = sum(precision_list) / total_cases
    avg_recall = sum(recall_list) / total_cases
    avg_f1 = sum(f1_list) / total_cases
    avg_rouge = sum(rouge_scores) / total_cases
    
    return {
        "accuracy_for_most_likely": accuracy,
        "precision_differential": avg_precision,
        "recall_differential": avg_recall,
        "f1_differential": avg_f1,
        "rouge_differential": avg_rouge,
    }



In [74]:
predicted_data = load_jsonl('model_predictions.jsonl')
ground_truth_data = load_jsonl('test_evaluation.jsonl')
evaluation_system(predicted_data,ground_truth_data)

{'accuracy_most_likely': 0.9958991494532199,
 'precision_differential': 0.9725353996770867,
 'recall_differential': 0.9751223975634911,
 'f1_differential': 0.9702945402271386,
 'rouge_differential': 0.9448142731543822}