<a href="https://colab.research.google.com/github/JyothiSupriya/MARITIME-SAFETY-CLASSIFICATION-USING-LLMs/blob/main/Multi_LLM_Binary_Evaluator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
os.environ['OLLAMA_HOST'] = '127.0.0.1:11434'

In [None]:
import subprocess
import re
import pandas as pd
from tqdm import tqdm  # For progress bar (install with pip install tqdm)

def parse_yes_no(response_text):
    """
    Attempts to locate a 'Yes' or 'No' in the response_text.
    Returns 'Yes', 'No', or 'Could not parse' if neither is found.
    """
    # Convert to lowercase for easy searching
    text_lower = response_text.lower()

    if "yes" in text_lower:
        return "Yes"
    elif "no" in text_lower:
        return "No"
    else:
        return "Could not parse"

def evaluate_row_pair(record, question):
    """
    Evaluates a single record-question pair using Ollama/Llama
    and ensures we only return 'Yes' or 'No'.
    """
    prompt = f"""
You are a safety expert. Evaluate the following record and answer the question with only one word: "Yes" or "No".

Record: {record}
Question: {question}

Reply with ONLY "Yes" or "No" (and no other text).
    """.strip()

    try:
        process = subprocess.run(
            ["ollama", "run", "llama3.3"],  # model_name ∈ {
                                           #   "GEMMA7B", "LLAMA3.2", "MISTRAL7B",
                                           #   "DEEPSEEK14B", "PHI4", "QWEN32B",
                                           #   "GEMMA27B", "DEEPSEEK32B", "LLAMA3.3"
                                           # }
            input=prompt,
            text=True,
            capture_output=True,
            encoding="utf-8",
            errors="replace"
        )

        if process.returncode == 0:
            # Extract raw text and parse
            raw_response = process.stdout.strip()
            parsed_response = parse_yes_no(raw_response)
            return parsed_response
        else:
            return f"Error: {process.stderr.strip()}"

    except Exception as e:
        return f"Error: {str(e)}"

def process_excel_data(input_file):
    """
    Processes Excel data with row-wise record-question pairs
    """
    # Read data and preserve row relationships
    df = pd.read_excel(input_file).dropna(subset=["Record", "Question"])

    results = []
    total_rows = len(df)

    print(f"Processing {total_rows} row pairs...")

    # Use tqdm for progress bar
    for idx, row in tqdm(df.iterrows(), total=total_rows):
        result = {
            "Record": row["Record"],
            "Question": row["Question"],
            "Response": evaluate_row_pair(row["Record"], row["Question"])
        }
        results.append(result)

    return pd.DataFrame(results)

if __name__ == "__main__":
    # Configuration
    input_file = "/content/Models_response.xlsx"  # Input file with Record and Question columns
    output_file = "llama3.3_3.xlsx"  # Output file

    # Process data
    results_df = process_excel_data(input_file)

    # Save results
    results_df.to_excel(output_file, index=False)
    print(f"\nResults saved to {output_file}")
    print("Sample output:")
    print(results_df.head())

Processing 5000 row pairs...


100%|██████████| 5000/5000 [1:48:28<00:00,  1.30s/it]



Results saved to llama3.3_3.xlsx
Sample output:
                                              Record  \
0  While carrying out fire drill, the fire hoses ...   
1  While carrying out fire drill, the fire hoses ...   
2  While carrying out fire drill, the fire hoses ...   
3  While carrying out fire drill, the fire hoses ...   
4  While carrying out fire drill, the fire hoses ...   

                                            Question Response  
0  Is this record related to faulty equipment con...      Yes  
1  Is this record related to unsafe conditions on...       No  
2  Is this record related to communication failur...       No  
3  Is this record related to poor visibility crea...       No  
4  Is this record related to usage of mobile/cell...       No  
