<a href="https://colab.research.google.com/github/ShouryaBatra/psbs-research-project/blob/main/notebooks/full_pipeline_eval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os

# Store token in env variable (more secure than plain text)
os.environ['GITHUB_TOKEN'] = "yourGithubToken"

# Use it to clone
!git clone https://$GITHUB_TOKEN@github.com/ShouryaBatra/psbs-research-project.git


In [None]:
!cd psbs-research-project/

In [None]:
!pip install -r psbs-research-project/leak_eval/requirements.txt

In [None]:
# gpt env variable

import os
os.environ["OPENAI_API_KEY"] = "yourOpenAIKey"

In [None]:
# create results directory in main directory

!mkdir -p results

In [None]:
# check gpu

!nvidia-smi

In [None]:
# clear any cached downloads
!rm -rf ~/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B

In [None]:
# install model

from huggingface_hub import snapshot_download
import os

# Set environment to avoid any caching issues
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

# Download fresh copy
print("Downloading Qwen2.5-1.5B model...")
snapshot_download(
    repo_id="Qwen/Qwen2.5-1.5B",
    local_dir="qwen2.5-1.5b",
    resume_download=False,  # Don't resume corrupted downloads
    local_files_only=False,  # Download from internet
    force_download=True      # Force fresh download
)
print("Download completed!")

In [None]:
# Check if the model files are present and valid
!ls -la qwen2.5-1.5b/
!ls -la qwen2.5-1.5b/*.safetensors

In [None]:
# copy over prompts folder into main directory

!cp -r psbs-research-project/prompts .

In [None]:
# install airgapagent-r benchmarks

from huggingface_hub import snapshot_download
import os

# Create datasets directory if it doesn't exist
os.makedirs("psbs-research-project/leak_eval/datasets", exist_ok=True)

# Download the airgapagent datasets from Hugging Face
snapshot_download(
    repo_id="parameterlab/leaky_thoughts",
    repo_type="dataset",
    local_dir="./psbs-research-project/leak_eval/datasets",
    ignore_patterns=["*.arrow", "*.lock"]  # Optional: skip unnecessary files
)

In [None]:
# create airgapagent sample dataset of 15
!python psbs-research-project/leak_eval/scripts/create_sample_dataset.py \
--input_file psbs-research-project/leak_eval/datasets/airgapagent-r-small.json \
--output_file psbs-research-project/leak_eval/datasets/airgapagent-r-sample-15.json \
--sample_size 15 \
--seed 42

In [None]:
# create airgapagent sample dataset of 100
!python psbs-research-project/leak_eval/scripts/create_sample_dataset.py \
--input_file psbs-research-project/leak_eval/datasets/airgapagent-r-small.json \
--output_file psbs-research-project/leak_eval/datasets/airgapagent-r-sample-100.json \
--sample_size 100 \
--seed 42

In [None]:
# copy over more files needed

!cp psbs-research-project/leak_eval/approp_matrix.csv .
!cp psbs-research-project/leak_eval/cp_eval_utils.py .
!cp psbs-research-project/leak_eval/generate_utils.py .

In [None]:
# test on airgapagent-r-sample-15 (15 prompts)
# can also change to test on airgapagent-r-sample-100 (don't do this though, that costs a lot of memory)

!python psbs-research-project/leak_eval/eval_cp.py \
--model qwen2.5-1.5b \
--input_file psbs-research-project/leak_eval/datasets/airgapagent-r-sample-15.json \
--output_file results/tiny_test_cot.json \
--gpt_eval \
--prompt_type cot_explicit_unk \
--max_tokens 200 \
--temperature 0.1

In [None]:
# get summary block from new results and print it

import json

file_path = 'results/tiny_test_cot.json'

with open(file_path, 'r') as f:
    data = json.load(f)

summary_block = data.get('summary')

if summary_block:
    print(json.dumps(summary_block, indent=2))
else:
    print(f"Could not find 'summary' block in {file_path}")