# **ProofWriterAnalysis**

### Dataset downloading

In [1]:
import os
DATASET_PATH = "../dataset"

if not os.path.isdir(DATASET_PATH):
    !wget -nc -P ../ https://allenai.org/data/proofwriter
    !unzip -d ../ ../proofwriter
    !mv ../proofwriter-dataset-V2020.12.3 $DATASET_PATH
    !rm ../proofwriter

### Model loading

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
torch.random.manual_seed(0)

model_name = "microsoft/Phi-4-mini-instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    
    attn_implementation="eager",
    output_attentions=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
 
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    batch_size=1,
    device_map="auto"
)
 
generation_args = {
    "max_new_tokens": 10,
    "return_full_text": False,
    "do_sample": False
}


Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.70it/s]
Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0


### Dataset loading

In [4]:
from data_loading import *
dataset = ProofWriterDataset(None, "depth-3/meta-train.jsonl", dataset_path="../dataset/OWA/")

### Execution

In [5]:
example_index = 14
example_elem = dataset[example_index]

def bool_to_bin(b):
    assert b in ["True", "False"], b
    return int(b == "True")

y_true = []
y_pred = []

prompts = []

for i, e in enumerate(dataset):
    if i == example_index: continue
    if i > 2: break

    y_true.append(bool_to_bin(e["label"]))
    prompt = build_one_shot_prompt(e, example_elem)
    prompts.append(prompt)


In [6]:
for i, res in enumerate(pipe(prompts, **generation_args)):
    print(res)
    try:
        generated_text = res[0]["generated_text"]
        y_pred.append(bool_to_bin(generated_text.split()[0]))
    except:
        print(i)

OutOfMemoryError: CUDA out of memory. Tried to allocate 18.00 MiB. GPU 0 has a total capacity of 15.56 GiB of which 40.31 MiB is free. Process 2404701 has 2.26 GiB memory in use. Including non-PyTorch memory, this process has 12.25 GiB memory in use. Of the allocated memory 11.90 GiB is allocated by PyTorch, and 60.96 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

ValueError: Found input variables with inconsistent numbers of samples: [3, 0]

In [8]:
model.eval()

inputs = tokenizer(prompts, return_tensors="pt", padding=True).to("cuda")

with torch.no_grad():
    outputs = model(**inputs)

attentions = outputs.attentions
print(f"Numero di layer: {len(attentions)}")
print(f"Shape di ogni attention map: {attentions[0].shape}")

Numero di layer: 32
Shape di ogni attention map: torch.Size([3, 24, 538, 538])


In [10]:
import matplotlib.pyplot as plt

plt.imshow(attentions[0][0][0].cpu(), cmap="viridis")
plt.title("Attention Layer 0 - Head 0")
plt.colorbar()
plt.show()


AttributeError: 'Tensor' object has no attribute 'gpu'