Download repository and dependencies for the abductive commonsense reasoning tasks.

In [None]:
!git clone https://github.com/allenai/abductive-commonsense-reasoning.git
%cd abductive-commonsense-reasoning
!pip install -r requirements.txt
!sh get-data.sh

Cloning into 'abductive-commonsense-reasoning'...
remote: Enumerating objects: 261, done.[K
remote: Counting objects: 100% (45/45), done.[K
remote: Compressing objects: 100% (45/45), done.[K
remote: Total 261 (delta 3), reused 0 (delta 0), pack-reused 216 (from 1)[K
Receiving objects: 100% (261/261), 6.11 MiB | 21.01 MiB/s, done.
Resolving deltas: 100% (142/142), done.
/content/abductive-commonsense-reasoning/abductive-commonsense-reasoning/abductive-commonsense-reasoning
Obtaining pytorch-transformers from git+https://github.com/csbhagav/pytorch-transformers@generative-finetuning#egg=pytorch-transformers (from -r requirements.txt (line 20))
  Cloning https://github.com/csbhagav/pytorch-transformers (to revision generative-finetuning) to ./src/pytorch-transformers
  Running command git clone --filter=blob:none --quiet https://github.com/csbhagav/pytorch-transformers /content/abductive-commonsense-reasoning/abductive-commonsense-reasoning/abductive-commonsense-reasoning/src/pytorch-

In [None]:
!pip install -q git+https://github.com/HawkClaws/oyama.git ollama
from oyama import oyama
import ollama

model_path = "deepseek-r1:8b"
model_name = oyama.run(model_path)

  Preparing metadata (setup.py) ... [?25l[?25hdone
command:=ollama --version
Output: /bin/sh: 1: ollama: not found
/bin/sh: 1: ollama: not found
command:=curl -fsSL https://ollama.com/install.sh | sh
Output: >>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama

In [None]:
import pandas as pd

# 1. Load dev set
df_dev = pd.read_json("/content/abductive-commonsense-reasoning/abductive-commonsense-reasoning/data/anli/dev.jsonl", lines=True)  # each line is one JSON record
labels_dev = pd.read_csv("/content/abductive-commonsense-reasoning/abductive-commonsense-reasoning/data/anli/dev-labels.lst", header=None, names=["label"])
df_dev["label"] = labels_dev["label"]

# 2. Load test set
df_test = pd.read_json("/content/abductive-commonsense-reasoning/abductive-commonsense-reasoning/data/anli/test.jsonl", lines=True)
labels_test = pd.read_csv("/content/abductive-commonsense-reasoning/abductive-commonsense-reasoning/data/anli/test-labels.lst", header=None, names=["label"])
df_test["label"] = labels_test["label"]



def make_prompt(row):
    return (
        f"Given the following two observations, predict what occurred inbetween the observations:\n"
        f"obs 1. {row['obs1']}\n"
        f"obs 2. {row['obs2']}\n\n"
        f"Which hypothesis is most likely to have occurred between the observations?\n"
        f"hyp 1. {row['hyp1']}\n"
        f"hyp 2. {row['hyp2']}\n\n"
        "Please respond with ONLY the integer of the hypothesis (1 or 2)."
    )

# Suppose df is the dataframe you printed above.
df_test["prompt"] = df_test.apply(make_prompt, axis=1)

# Now df["prompt"] contains the entire prompt for each row.
print(df_test["prompt"].head())


0    Given the following two observations, can you ...
1    Given the following two observations, can you ...
2    Given the following two observations, can you ...
3    Given the following two observations, can you ...
4    Given the following two observations, can you ...
Name: prompt, dtype: object


In [None]:
import ollama
import pandas as pd
import concurrent.futures

def query_model(prompt_text):
    response = ollama.chat(
        model=model_name,
        messages=[{"role": "user", "content": prompt_text}],
        stream=False
    )
    return response.get("response", "").strip()

def run_inference_parallel(df, max_workers=4):
    df["prediction"] = None
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {}
        for idx, row in df.iterrows():
            prompt_text = row["prompt"]
            futures[executor.submit(query_model, prompt_text)] = idx

        for future in concurrent.futures.as_completed(futures):
            idx = futures[future]
            try:
                result = future.result()
                df.at[idx, "prediction"] = result
            except Exception as e:
                print(f"Error on row {idx}: {e}")
                df.at[idx, "prediction"] = None
    return df

anli_test_results = run_inference_parallel(df_test, max_workers=8)
print(anli_test_results.head())