In [None]:
%%bash
git clone https://github.com/StonyBrookNLP/musique.git

In [None]:
%%bash
cd /kaggle/working/musique
bash download_data.sh

In [None]:
import pandas as pd

test_json='/kaggle/working/musique/data/musique_ans_v1.0_test.jsonl'

test_data=pd.read_json(test_json, lines=True)
test_data.shape

In [None]:
test_data.head()

In [None]:
test_data.iloc[0]['paragraphs'][:2]

# Making Multiple Reasoning Paths

In [None]:
ans=pd.read_json('/kaggle/working/musique/data/musique_ans_v1.0_dev.jsonl', lines=True)
ans.head(5)

In [7]:
ans=ans.head(1200)

In [None]:
first_ans=ans.iloc[0]
print(first_ans['question'])
print(first_ans['question_decomposition'])

In [None]:
print(first_ans['paragraphs'])

In [None]:
# Step 1: Extract the relevant paragraphs using paragraph_support_idx from the question decomposition
def extract_relevant_paragraphs(decomposition, paragraphs):
    context = []
    
    # For each decomposition part, extract the relevant paragraph based on paragraph_support_idx
    for item in decomposition:
        # Get the paragraph index
        paragraph_idx = item['paragraph_support_idx']
        
        # Find the paragraph by matching the idx
        relevant_paragraph = next((p for p in paragraphs if p['idx'] == paragraph_idx), None)
        
        if relevant_paragraph:
            context.append(relevant_paragraph['paragraph_text'])
    
    return context

# Extract the relevant context for the question decomposition
# context = extract_relevant_paragraphs(first_ans['question_decomposition'], first_ans['paragraphs'])
# context

# Apply the function to the DataFrame
ans['evidence'] = ans.apply(lambda row: extract_relevant_paragraphs(row['question_decomposition'], row['paragraphs']), axis=1)

# Drop rows where evidence is None (if any other checks are necessary)
ans = ans.dropna(subset=['evidence'])
ans.shape

In [None]:
from typing import Optional, List, Dict
from pydantic import BaseModel

class Entity(BaseModel):
    id: str
    paragraphs: List[Dict]
    question: str
    question_decomposition: List[Dict]
    answer: str
    answer_aliases: List[str]
    answerable: bool
    evidence: List[str]
    

class ReasoningPath(BaseModel):
    question: str
    evidence: str
    reason: str
    answer: str
    ground_truth: str
    temperature: float
    tokens: int
    score: float

entities = [Entity.model_validate(row) for row in ans.to_dict(orient='records')]
print(len(entities))

In [None]:
entities[0].evidence

In [None]:
import re
from sot.sot import SoT
from typing import Tuple, Union
from doraemon.logger_util import get_logger
from doraemon.inference_factory import InferenceFactory

logger=get_logger(name=__name__, logfile="musique_dataset_builder.log")

def get_answer(raw_answer: str)-> str:
    answer=re.search(r"\\boxed\{(.*?)\}", raw_answer)
    if answer:
        return answer.group(1)
    return "None"
    

def process_entity(args)-> Optional[Tuple[ReasoningPath, int]]:
    """
    """
    et,paradigm,temperature=args

    try:
        evidence_str = "\n".join(et.evidence)
        prompt=SoT.get_initialized_prompt(paradigm=paradigm, question=f"Context:{evidence_str}\nQuestion:{et.question}")

        r_s, tokens=InferenceFactory.inference(logger=logger, messages=prompt, temperature=temperature)
        logger.info(r_s)
        result=ReasoningPath(
            question=str(et.question), 
            evidence=evidence_str,
            reason=str(r_s), 
            answer=get_answer(r_s),
            ground_truth=str(et.answer),
            temperature=float(temperature), 
            tokens=int(tokens), 
            score=0.0)
        return result, tokens
    except Exception as e:
        logger.error(f"Error processing quetion {et.question} at temperature {temperature} with exception {e}")
        return None


paradigm = SoT.classify_question(entities[0].question)
logger.info(paradigm)
assert "conceptual_chaining"==str(paradigm)

In [None]:
temperatures = [i * 0.25 for i in range(9)]  # [0.0, 0.25, 0.5, ... ,2.0]

tasks=[]
for et in entities:
    for tp in temperatures:
        tasks.append((et,paradigm,tp))
tasks[0]

In [None]:
from tqdm import tqdm
import concurrent.futures

with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
    results=list(tqdm(executor.map(process_entity, tasks), total=len(tasks)))

In [None]:
from typing import List

multipleReasonPaths: List[ReasoningPath]=[]

for rs_token in results:
    if rs_token is not None:
        reason_path,_=rs_token
        multipleReasonPaths.append(reason_path)

len(multipleReasonPaths)

In [None]:
logger.info(multipleReasonPaths[0])
logger.info(multipleReasonPaths[0].answer)

In [18]:
import pickle
from datetime import datetime
from datasets import Dataset

def convert_rp_to_ds(multipleReasonPaths: List[ReasoningPath]) -> Dataset:
    data_dicts = [rp.model_dump() for rp in multipleReasonPaths]
    # Create a Dataset from the list of dictionaries
    return Dataset.from_list(data_dicts)

def to_pkl(ds: Dataset, filename: str):
    with open(filename, 'wb') as f:
        pickle.dump(ds, f)


ds:Dataset=convert_rp_to_ds(multipleReasonPaths)

to_pkl(ds, filename=f"musique-sots-dataset.pkl")

In [None]:
df = ds.to_pandas()
df.head(9)

In [20]:
!rm -rf /kaggle/working/musique

In [None]:
import matplotlib.pyplot as plt

# Tokens distribution
plt.figure()
df['tokens'].hist()
plt.title("Tokens per Example")
plt.xlabel("tokens")
plt.ylabel("count")
plt.show()