# BT-BERT Workflow

Bu notebook BT-BERT veri hazırlığı, eğitim, değerlendirme ve açıklanabilirlik adımlarını uçtan uca çalıştırmak için başlangıç noktasıdır.

# 1. Hazirlik

In [1]:
from pathlib import Path
import json
import subprocess
import sys

PROJECT_ROOT = Path('..').resolve()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

CONFIG_PATH = PROJECT_ROOT / 'config.yaml'

# NOTE: adjust environment variables (e.g., CUDA) here if needed.


# 2. Data Labelling

In [2]:
# Skip data labelling locally; expect files copied from cluster.
from pathlib import Path
data_dir = Path('../data')
required = [data_dir/'labels.csv', data_dir/'train.csv', data_dir/'val.csv', data_dir/'test.csv']
missing = [p for p in required if not p.exists()]
if missing:
    print('Data files missing. Copy from cluster or run data_prep manually:')
    print('  python ../src/data_prep.py --config ../config.yaml')
    print('Missing:', [str(p) for p in missing])
else:
    print('Data files present; skipping labelling step.')


# 3. Inspect Label Summary

In [4]:
import pandas as pd
import json

summary_path = Path("../data/label_summary.json")
if summary_path.exists():
    summary = json.loads(summary_path.read_text(encoding="utf-8"))
    display(pd.DataFrame(summary).T)
else:
    print("Run the data labelling step first.")

Run the data labelling step first.


# 4. Training

In [5]:
# Disable local training; use checkpoints copied from cluster.
from pathlib import Path
ckpt_dir = Path('../outputs/checkpoints')
ckpts = sorted(ckpt_dir.glob('*.pt')) if ckpt_dir.exists() else []
if ckpts:
    print(f'Found {len(ckpts)} checkpoints; skipping training.')
else:
    print('No checkpoints found. Copy from cluster or run a short local test:')
    print('  python ../src/train.py --config ../config.yaml --max_steps 10')


# 5. Evaluation

In [6]:
from pathlib import Path
import json
import subprocess

checkpoint_dir = Path("../outputs/checkpoints")
checkpoints = sorted(checkpoint_dir.glob("*.pt"))
print("Found checkpoints:")
for ckpt in checkpoints:
    print(ckpt.name)

if checkpoints:
    selected_checkpoint = checkpoints[-1]
    metrics_output = Path("../outputs/eval_metrics.json")
    cmd = ["python", "../src/evaluate.py", "--config", str(CONFIG_PATH), "--checkpoint", str(selected_checkpoint), "--split", "test", "--output", str(metrics_output)]
    print('Running:', ' '.join(cmd))
    subprocess.run(cmd, check=True)
    print(json.loads(metrics_output.read_text(encoding='utf-8')))
else:
    print('Train the model first to generate checkpoints.')

Found checkpoints:
Train the model first to generate checkpoints.


# 6. Explainability

In [None]:
from pathlib import Path
import json
import subprocess

explain_output = Path("../outputs/attention_reports/explanations.json")
explain_output.parent.mkdir(parents=True, exist_ok=True)

if checkpoints:
    selected_checkpoint = checkpoints[-1]
    cmd = ["python", "../src/explain.py", "--config", str(CONFIG_PATH), "--checkpoint", str(selected_checkpoint), "--split", "val", "--sample", "50", "--output", str(explain_output)]
    print('Running:', ' '.join(cmd))
    subprocess.run(cmd, check=True)
    explanations = json.loads(explain_output.read_text(encoding='utf-8'))
    print(f"{len(explanations)} explanations written to {explain_output}")
else:
    print('No checkpoints available. Train the model first.')

# 7. Inspect Outputs

In [1]:
import pandas as pd
from pathlib import Path
import subprocess

output_path = Path('../outputs/bt_bert_outputs.csv')
if not output_path.exists():
    print('Creating prediction output using latest checkpoint...')
    checkpoint_dir = Path('../outputs/checkpoints')
    checkpoints = sorted(checkpoint_dir.glob('*.pt'))
    if checkpoints:
        selected_checkpoint = checkpoints[-1]
        cmd = [
            'python',
            '../src/predict.py',
            '--config',
            str(CONFIG_PATH),
            '--checkpoint',
            str(selected_checkpoint),
            '--split',
            'test',
            '--output',
            str(output_path),
        ]
        print('Running:', ' '.join(cmd))
        subprocess.run(cmd, check=True)
    else:
        print('No checkpoints available. Train the model first.')

if output_path.exists():
    df = pd.read_csv(output_path)
    display(df.head())
else:
    print('No exported recommendation outputs yet.')

Creating prediction output using latest checkpoint...
No checkpoints available. Train the model first.
No exported recommendation outputs yet.


---
This notebook provides a scaffold for the BT-BERT workflow. Customize each code block as needed.