In [None]:
# Remove credentials from notebook session
# WARNING: Do NOT print these variables after this cell!
if 'username' in dir():
    del username
if 'password' in dir():
    del password
if 'username_env' in dir():
    del username_env
if 'password_env' in dir():
    del password_env
if 'username_keyring' in dir():
    del username_keyring
if 'password_keyring' in dir():
    del password_keyring

# Clear output history
from IPython.display import clear_output
clear_output()

print("✓ Secrets cleared from memory")
print("✓ Notebook output cleared")
print("\nSafe to save and commit this notebook now.")

## Section 9: Clean Up Secrets

**ALWAYS run this before saving/sharing your notebook:**

In [None]:
import pandas as pd

# Read and display first few rows of merged CSV (if it exists)
if Path(output_file).exists():
    df = pd.read_csv(output_file)
    print(f"✓ File shape: {df.shape} (rows, columns)")
    print(f"✓ Columns: {list(df.columns)}")
    print("\nFirst 5 rows:")
    display(df.head())
else:
    print(f"✗ File not found: {output_file}")
    print(f"   Available files in data/raw/:")
    raw_dir = Path(dest_dir)
    if raw_dir.exists():
        for f in raw_dir.glob("*.csv"):
            print(f"   - {f.name}")
    else:
        print(f"   Directory {dest_dir} does not exist")

## Section 8: Verify Merged Output

Check the merged CSV file:

In [None]:
# Browser Download Example
# Use this if you need to click a button to download (like Realtime Trains likely requires)

output_file_browser = "data/merged_realtime_trains_browser.csv"

try:
    # This will launch a Chrome browser, log in, click the download button, and merge files
    result = _collect_csvs_with_browser(start_date, end_date, url_template, output_file_browser,
                                        dest_dir=dest_dir, 
                                        username=username, 
                                        password=password)
    print(f"\n✓ Browser download complete: {result}")
except Exception as e:
    print(f"✗ Browser download failed: {e}")
    print("Make sure you customized the selectors in data_collection.py!")

## Section 7: Browser-Based Download (Playwright)

**FOR BUTTON-CLICK DOWNLOADS**

Before running, customize the selectors in `data_collection.py`:
- See `download_csv_with_browser()` comments for `[CUSTOMIZE]` sections
- Use browser DevTools (F12) to find your button and login selectors

In [None]:
# HTTP Download Example
# Run this if the CSV can be downloaded directly without a button click

try:
    result = collect_csvs(start_date, end_date, url_template, output_file, 
                          dest_dir=dest_dir, 
                          auth=(username, password) if username else None)
    print(f"\n✓ Download complete: {result}")
except Exception as e:
    print(f"✗ Download failed: {e}")

In [None]:
# Choose which credentials to use (env > keyring > prompt)
username = username_env or username_keyring
password = password_env or password_keyring

if not username or not password:
    import getpass
    username = input("Username: ")
    password = getpass.getpass("Password: ")

# Configuration
start_date = date(2024, 11, 8)
end_date = date(2024, 11, 10)
url_template = "https://www.realtimetrains.co.uk/search/detailed/gb-nr:RDG/{yyyy}-{mm}-{dd}/0000-2359?stp=WVC&show=all&order=wtt"
output_file = "data/merged_realtime_trains.csv"
dest_dir = "data/raw"

print(f"Configuration:")
print(f"  Start date: {start_date}")
print(f"  End date: {end_date}")
print(f"  Output: {output_file}")
print(f"  Username: {username[0]}***" if username else "  Username: None")

## Section 6: HTTP-Based Download (Direct Requests)

For sites where you can get a direct URL (no button click needed):

In [None]:
# Add the src/modules directory to path so we can import data_collection
sys.path.insert(0, str(Path("src/modules").resolve()))

from data_collection import (
    download_csv_for_date,
    download_csv_with_browser,
    collect_csvs,
    _collect_csvs_with_browser,
    daterange
)

print("✓ Data collection module loaded")

## Section 5: Load Data Collection Module

Import the downloader functions from your script:

In [None]:
# FIRST TIME: Uncomment to store credentials in system keyring
# import getpass
# username = input("Enter username: ")
# password = getpass.getpass("Enter password: ")
# keyring.set_password("realtime-trains", "username", username)
# keyring.set_password("realtime-trains", "password", password)
# print("✓ Credentials stored in system keyring")

# Retrieve from keyring (does not print secret value)
username_keyring = keyring.get_password("realtime-trains", "username")
password_keyring = keyring.get_password("realtime-trains", "password")

if username_keyring:
    print(f"✓ Username in keyring (starts with '{username_keyring[0]}'...)")
else:
    print("✗ No credentials in keyring yet. Uncomment above to store.")

## Section 4: OS Secret Store - Keyring

**Recommended for secure local storage** (uses Windows Credential Manager, macOS Keychain, or Linux Secret Service)

Store credentials securely once:
```python
keyring.set_password("realtime-trains", "username", "your_username")
keyring.set_password("realtime-trains", "password", "your_password")
```

Then retrieve them in any cell (never prints to output):

In [None]:
# Load secrets from .env file
load_dotenv(override=False)

username_env = os.getenv("REALTIME_TRAINS_USERNAME")
password_env = os.getenv("REALTIME_TRAINS_PASSWORD")

# Verify secrets are loaded (without printing them)
if username_env:
    print(f"✓ Username loaded from .env (starts with '{username_env[0]}'...)")
else:
    print("✗ No REALTIME_TRAINS_USERNAME in .env")

if password_env:
    print(f"✓ Password loaded from .env")
else:
    print("✗ No REALTIME_TRAINS_PASSWORD in .env")

## Section 3: Local Secret Management - .env File

**RECOMMENDED for local development**

Create a `.env` file in your project root with your credentials:
```
REALTIME_TRAINS_USERNAME=your_username_here
REALTIME_TRAINS_PASSWORD=your_password_here
```

**IMPORTANT**: Add `.env` to `.gitignore` to prevent committing secrets:
```
echo ".env" >> .gitignore
```

In [None]:
# Import core modules
from datetime import date, timedelta, datetime
import os
import sys
from pathlib import Path

# Import secret managers
import keyring
from dotenv import load_dotenv

# Optional: Import cloud secret managers (install as needed)
try:
    import boto3
    HAS_AWS = True
except ImportError:
    HAS_AWS = False

try:
    from azure.identity import DefaultAzureCredential
    from azure.keyvault.secrets import SecretClient
    HAS_AZURE = True
except ImportError:
    HAS_AZURE = False

try:
    from google.cloud import secretmanager
    HAS_GCP = True
except ImportError:
    HAS_GCP = False

print("✓ Core imports loaded")
print(f"AWS support: {HAS_AWS}")
print(f"Azure support: {HAS_AZURE}")
print(f"GCP support: {HAS_GCP}")

In [None]:
import subprocess
import sys

# Install all optional secret manager libraries
packages = [
    "requests",
    "playwright",
    "python-dotenv",
    "keyring",
]

for pkg in packages:
    try:
        __import__(pkg.replace("-", "_"))
        print(f"✓ {pkg} already installed")
    except ImportError:
        print(f"Installing {pkg}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", pkg])
        print(f"✓ {pkg} installed")

# For playwright, install browsers
try:
    subprocess.check_call(["playwright", "install", "-q"], stderr=subprocess.DEVNULL)
    print("✓ Playwright browsers installed")
except:
    print("Note: Run 'playwright install' separately if needed")

## Section 2: Install & Import Dependencies

Run this cell once to install required packages for secret managers:

In [None]:
# Section 1: Kernel & Python Environment
import sys
import os

print(f"Python executable: {sys.executable}")
print(f"Python version: {sys.version}")
print(f"Current working directory: {os.getcwd()}")

# CSV Download & Merge with Secret Management

This notebook demonstrates how to:
1. Run the Realtime Trains CSV downloader in a Jupyter notebook
2. Securely manage credentials using multiple secret managers
3. Support both HTTP requests and browser automation

## Quick Start
- See **Section 1** for kernel setup
- See **Section 2** for dependencies
- See **Section 3** for local secret setup (`.env` file)
- See **Sections 4-7** for advanced cloud secret managers