In [None]:
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage
from langchain_core.messages import SystemMessage
import os
import time
import pandas as pd
import base64
from joblib import Parallel, delayed
from pathlib import Path
import random


In [None]:
InputFileFolder = 'MultiModalInputs'
LLMName = 'mistral-small3.1:24b'

# already installed LLM that supports multimodal inputs
#llava:latest
#llama4:16x17b
#gemma3:12b
#qwen2.5vl:7b
#mistral-small3.1:24b
#bakllava:7b
llm = ChatOllama(model=LLMName)
llm.invoke('Hello, please show me your LLM model version.')

# GROK

# create an output folder
safe_LLMName = LLMName.replace(':', '_')
path = Path(f"MultiModalOutputs/{safe_LLMName}")
path.mkdir(parents=True, exist_ok=True)

  llm = ChatOllama(model=LLMName)


In [None]:
import json
with open("Final_QA_JSON_filtered_0723.json", "r", encoding="utf-8") as f:
    QASet = json.load(f)
EventNames = list(QASet.keys())

In [None]:
def write_message(Q_current):
    # write a message given the current question format. This message is submission-ready to LLM
    # Q_current = QASet[EventNames[23]]['qa'][123]

    Q_modalities = Q_current['modalities']
    storm, year, leadtime = Q_current['context'][-3:]

    human_content = []
    for modality in Q_modalities:
        filename = f"{storm}_{year}_{leadtime}h"
        folder_path = modality

        if (modality == 'Graphic_Uncertainty_cone') or (modality == 'Graphic_Wind'):
            image_path = os.path.join(InputFileFolder, folder_path, f"{filename}.PNG")
            with open(image_path, "rb") as img_file:
                image_b64 = base64.b64encode(img_file.read()).decode("utf-8")
                human_content.append({
                    "type": "image_url",
                    "image_url": {"url": f"data:image/gif;base64,{image_b64}"}
                })

        elif (modality == 'text_advisory') or (modality == 'Table_wind'):
            text_path = os.path.join(InputFileFolder, folder_path, f"{filename}.txt")
            with open(text_path, "r", encoding="utf-8") as text_file:
                advisory_text = text_file.read()
                human_content.append({
                    "type": "text",
                    "text": advisory_text
                })
        else:
            raise ValueError('Unknown Modality Input!')

    # Append question last
    human_content.append({
        "type": "text",
        "text": Q_current['question']
    })

    # Assemble messages
    messages = [
        SystemMessage(content=Q_current['prompt']),
        HumanMessage(content=human_content)
    ]

    true_answer = Q_current['answer']

    return(messages,true_answer)

Loop over each event


In [None]:
def run_LLM(EventIdx):

    EventName_thisevent = EventNames[EventIdx]
    print(f"EventIdx = {EventIdx}, EventName = {EventName_thisevent}， LLM = {LLMName}")
    QASet_thisevent = QASet[EventNames[EventIdx]]['qa']
    QASet_thisevent_indexed = list(enumerate(QASet_thisevent))
    random.shuffle(QASet_thisevent_indexed)  # Shuffle

    results_dict = {}

    for idx_q,q in enumerate(QASet_thisevent_indexed):
        t1 = time.time()
        message_thisq,true_answer = write_message(q)
        response = llm.invoke(message_thisq)
        t2 = time.time()

        # Store result
        results_dict[idx_q] = {
            "response": response.content.strip(),
            "ground_truth": true_answer
        }

        # Step 5: Sort results by original index and convert to list
        results_thisevent = [results_dict[idx] for idx in sorted(results_dict)]


        print(f"Evtid = {EventIdx}, Q_idx = {idx_q},  elasped time = {t2-t1:.4f}")

    df = pd.DataFrame(results_thisevent)
    df.to_csv(os.path.join(f"MultiModalOutputs/{safe_LLMName}/EvtID{EventIdx}_{EventName_thisevent}.csv"), index=False)

In [None]:
Parallel(n_jobs = -1,backend="loky")(
        delayed(run_LLM)(evntidx)
        for evntidx in range(len(EventNames))
    )

KeyboardInterrupt: 