# Crypto quantile forecasting (BTC/ETH) — Colab runner

Цей ноутбук запускає пайплайн з папки `code/`:
1) завантаження цін (CoinGecko),
2) побудова датасету,
3) тренування квантільних моделей LightGBM,
4) оцінювання (pinball loss, coverage) + графік.

## Як користуватись
1. У Colab: **File → Upload notebook** (цей файл).
2. Завантаж у сесію Colab zip цього проєкту (або просто папку `code/`).
3. Запусти комірки зверху вниз.


In [None]:
# === 0) Upload project zip (interactive) ===
# If you already have the project in Colab's filesystem, skip this cell.

from google.colab import files
uploaded = files.upload()  # upload a .zip of your Prism project

import os, zipfile, glob
zips = [k for k in uploaded.keys() if k.lower().endswith('.zip')]
assert len(zips) == 1, f"Upload exactly one zip, got: {zips}"

zip_path = zips[0]
# Some zip archives created on Windows can have non-UTF8 filenames.
# Python's ZipFile may decode them incorrectly, producing garbled names.
# We only need the `code/` folder, so we extract with a best-effort filename fix.
import shutil

def _safe_name(name: str) -> str:
    # Try to re-decode CP437->UTF8 or CP437->CP866/CP1251 when possible.
    # If it fails, keep the original name.
    for enc in ('utf-8', 'cp866', 'cp1251'):
        try:
            return name.encode('cp437').decode(enc)
        except Exception:
            pass
    return name

with zipfile.ZipFile(zip_path, 'r') as z:
    members = z.infolist()
    for m in members:
        fixed = _safe_name(m.filename)
        # Normalize path separators
        fixed = fixed.replace('\\\\', '/').replace('\\', '/')
        # Only extract the code/ directory (and ignore other garbled-named files)
        if '/code/' in ('/' + fixed) or fixed.endswith('/code') or fixed.startswith('code/'):
            m.filename = fixed
            z.extract(m, 'project')

# If the zip had a nested root folder, ensure we have a top-level project/code
code_candidates = glob.glob('project/**/code/config.yaml', recursive=True)
assert len(code_candidates) >= 1, 'After extraction, could not find code/config.yaml (code folder missing).'

# Make a canonical copy to project/code if needed
cfg_path = code_candidates[0]
detected_root = os.path.abspath(os.path.join(os.path.dirname(cfg_path), '..', '..'))
src_code = os.path.join(detected_root, 'code')
dst_code = os.path.abspath('project/code')
if os.path.abspath(src_code) != dst_code:
    if os.path.exists(dst_code):
        shutil.rmtree(dst_code)
    shutil.copytree(src_code, dst_code)
    print('Copied code/ to canonical path: project/code')

print('Code files:', os.listdir('project/code'))

print('Extracted to: project/')
print('Top-level:', os.listdir('project')[:20])


In [None]:
# === 1) Install dependencies ===
import sys, os, glob

# Zip may extract into a nested folder, so locate requirements.txt dynamically
req_candidates = glob.glob('project/**/code/requirements.txt', recursive=True)
assert len(req_candidates) >= 1, 'Could not find code/requirements.txt after extracting zip.'
req_path = req_candidates[0]
root = os.path.abspath(os.path.join(os.path.dirname(req_path), '..', '..'))
print('Detected project root:', root)
print('Using requirements:', req_path)

%pip -q install -r {req_path}

print('Python:', sys.version)


In [None]:
# === 2) Run pipeline ===
import os
import glob

print('Sanity check: code files under project/**/code:', glob.glob('project/**/code/config.yaml', recursive=True)[:5])

# Some zips extract into a nested folder (e.g., project/<name>/code/...).
# Auto-detect the folder that contains code/config.yaml.
candidates = glob.glob('project/**/code/config.yaml', recursive=True)
assert len(candidates) >= 1, 'Could not find code/config.yaml after extracting zip.'
config_path = candidates[0]
root = os.path.abspath(os.path.join(os.path.dirname(config_path), '..', '..'))
print('Detected project root:', root)

!python {root}/code/01_fetch_prices.py --config {root}/code/config.yaml
!python {root}/code/02_make_dataset.py --config {root}/code/config.yaml
!python {root}/code/03_train_quantile_gbm.py --config {root}/code/config.yaml
!python {root}/code/04_evaluate.py --config {root}/code/config.yaml

print('Reports found:', glob.glob('project/**/reports/*', recursive=True)[:30])
print('Data found:', glob.glob('project/**/data/*', recursive=True)[:30])
print('Models found:', glob.glob('project/**/models/*', recursive=True)[:30])


In [None]:
# === 3) Show outputs ===
import pandas as pd
import glob
import os

eval_candidates = glob.glob('project/**/reports/eval_4h.csv', recursive=True)
assert len(eval_candidates) >= 1, f'Could not find eval_4h.csv. Found reports: {glob.glob("project/**/reports/*", recursive=True)[:20]}'
eval_path = eval_candidates[0]
print('Using:', eval_path)
df = pd.read_csv(eval_path)
df


In [None]:
# Plot preview
from IPython.display import Image, display
import glob
png_candidates = glob.glob('project/**/reports/pred_vs_true_4h.png', recursive=True)
assert len(png_candidates) >= 1, 'Could not find pred_vs_true_4h.png'
display(Image(png_candidates[0]))
