# Run Models Notebook

This notebook demonstrates executing code and uploading results to GitHub from Google Cloud Run.

In [None]:
# new version that tests report creation. note that below cell has a tag 'parameters' and is vital for papermill injection

In [None]:
# Parameters (for papermill injection)
folder = None
features = {}
targets = {}
models = None
steps = []

In [None]:
import os
import sys
import base64
import requests
import json
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
from fpdf import FPDF
from google.cloud import secretmanager

In [None]:
# === SETUP ===
print("Starting notebook execution...", file=sys.stderr)
TARGET_REPO = "modelearth/reports"
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
run_folder = f"reports/run-{timestamp}"
local_output_dir = os.path.join("output", timestamp)
os.makedirs(local_output_dir, exist_ok=True)

In [None]:

# === PARAMETER CAPTURE ===
parameters_used = {
    "folder": folder,
    "features": features,
    "targets": targets,
    "models": models
}
print("=== DEBUG: Parameters Received ===", file=sys.stderr)
print(json.dumps(parameters_used, indent=2), file=sys.stderr)

param_log_path = os.path.join(local_output_dir, "params.json")
try:
    with open(param_log_path, "w") as f:
        json.dump(parameters_used, f, indent=2)
    print("Saved parameter log:", param_log_path)
except Exception as e:
    print(f"[ERROR] Failed to write parameter log: {e}", file=sys.stderr)

In [None]:
# === AUTHENTICATION ===
def get_github_token():
    # 1. Try Google Cloud Secret Manager first
    try:
        client = secretmanager.SecretManagerServiceClient()
        project_id = os.environ.get("GOOGLE_CLOUD_PROJECT")
        if not project_id:
            raise ValueError("GOOGLE_CLOUD_PROJECT not set")

        name = f"projects/{project_id}/secrets/github-token/versions/latest"
        response = client.access_secret_version(request={"name": name})
        token = response.payload.data.decode("UTF-8")
        print("[AUTH] GitHub token retrieved from Secret Manager.", file=sys.stderr)
        return token
    except Exception as e:
        print(f"[WARN] Secret Manager token fetch failed: {e}", file=sys.stderr)

    # 2. Fallback to environment variable (e.g., from .env)
    try:
        # Only attempt to load .env if not in production
        if os.environ.get("FLASK_ENV") != "prod":
            from dotenv import load_dotenv
            load_dotenv()
            print("[DEBUG] .env loaded", file=sys.stderr)

        token = os.environ.get("GITHUB_TOKEN")
        if token:
            print("[AUTH] GitHub token loaded from environment.", file=sys.stderr)
            return token
    except Exception as e:
        print(f"[WARN] Failed to load token from .env fallback: {e}", file=sys.stderr)

    print("[ERROR] GitHub token not found anywhere!", file=sys.stderr)
    return None

# Assign for global use
GITHUB_TOKEN = get_github_token()


In [None]:
# === SAMPLE FILE GENERATION (USE THESE AS REFERENCE) ===

# 1. Plot (PNG)
plt.plot([1, 2, 3], [1, 4, 9])
plt.title("Test Plot")
plot_path = os.path.join(local_output_dir, "test-plot.png")
plt.savefig(plot_path)
print("Saved PNG:", plot_path)

# 2. HTML
html_path = os.path.join(local_output_dir, "test.html")
with open(html_path, "w") as f:
    f.write("<h1>This is a test HTML output</h1>")
print("Saved HTML:", html_path)

# 3. CSV (manual)
csv_path = os.path.join(local_output_dir, "test.csv")
with open(csv_path, "w") as f:
    f.write("a,b,c\n1,2,3\n4,5,6")
print("Saved CSV:", csv_path)

# 4. DataFrame CSV
df = pd.DataFrame({
    "A": [1, 2, 3],
    "B": ["x", "y", "z"]
})
df_csv_path = os.path.join(local_output_dir, "test-data.csv")
df.to_csv(df_csv_path, index=False)
print("Saved DataFrame CSV:", df_csv_path)



In [None]:
# STEP A
if 'debug' in steps: #TAG A CELL LIKE 'step:step_name' to make it optional but selectable via UI, also wrap it in a conditional like below. (Optional TODO pipeline improvement is to use papermill but that is out of scope currently))
    print("✅ STEP A (debug): Running debug routines")
    # 5. PDF
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.cell(200, 10, txt="Hello, this is a test PDF report.", ln=True)
    pdf_path = os.path.join(local_output_dir, "test-report.pdf")
    pdf.output(pdf_path)
    print("Saved PDF:", pdf_path)

In [None]:
# STEP B
if 'preview' in steps:
    from IPython.display import display
    print("✅ STEP B (preview): Showing sample DataFrame")
    display(df.head())

In [None]:
# STEP C
if 'log_summary' in steps:
    print("✅ STEP C (log_summary): Saving summary log")
    with open(os.path.join(local_output_dir, "summary.log"), "w") as f:
        f.write("Notebook execution completed with test steps.\n")

In [None]:
# === GITHUB UPLOAD HELPERS ===

def upload_file_to_github(local_path, remote_path, repo, token, branch='main'):
    with open(local_path, "rb") as f:
        content_encoded = base64.b64encode(f.read()).decode('utf-8')
    url = f"https://api.github.com/repos/{repo}/contents/{remote_path}"
    headers = {
        'Authorization': f'token {token}',
        'Accept': 'application/vnd.github.v3+json'
    }
    data = {
        'message': f"Add {remote_path}",
        'content': content_encoded,
        'branch': branch
    }
    resp = requests.put(url, headers=headers, json=data)
    print(f"Uploaded {remote_path}: {resp.status_code} - {resp.reason}", file=sys.stderr)
    return resp.status_code == 201

In [None]:
# === UPLOAD FILES ===

if GITHUB_TOKEN:
    print("=== GITHUB UPLOAD STAGE STARTED ===", file=sys.stderr)
    for filename in os.listdir(local_output_dir):
        local_path = os.path.join(local_output_dir, filename)
        remote_path = f"{run_folder}/{filename}"
        upload_file_to_github(local_path, remote_path, TARGET_REPO, GITHUB_TOKEN)
else:
    print("No GitHub token available - skipping upload", file=sys.stderr)

In [None]:
# === BASIC MARKDOWN LOG ===

def upload_simple_report(repo, token, branch='main', commit_message='Report from notebook execution'):
    report_md = f"""
# Execution Report

Generated: {datetime.now().isoformat()}
Environment: Google Cloud Run
Project: {os.environ.get('GOOGLE_CLOUD_PROJECT', 'unknown')}
Status: Success

## Parameters Used
"""
    for key, value in parameters_used.items():
        report_md += f"- **{key}**: `{value}`\n"
    report_md += "\n---\n"

    file_path = f"reports/execution-{timestamp}.md"
    content_encoded = base64.b64encode(report_md.encode('utf-8')).decode('utf-8')
    url = f'https://api.github.com/repos/{repo}/contents/{file_path}'
    headers = {
        'Authorization': f'token {token}',
        'Accept': 'application/vnd.github.v3+json'
    }
    data = {
        'message': commit_message,
        'content': content_encoded,
        'branch': branch
    }
    resp = requests.put(url, headers=headers, json=data)
    if resp.status_code == 201:
        print("Simple .md report uploaded successfully", file=sys.stderr)
    else:
        print(f"Failed to upload .md report: {resp.status_code} - {resp.text}", file=sys.stderr)

if GITHUB_TOKEN:
    upload_simple_report(TARGET_REPO, GITHUB_TOKEN)
else:
    print("No token for .md report upload", file=sys.stderr)

print("=== NOTEBOOK COMPLETED ===", file=sys.stderr)


In [None]:
import subprocess

def run_and_print(command, label=None):
    print(f"\n### {label} ###\n")
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    print(result.stdout)
    if result.stderr:
        print("stderr:", result.stderr)

run_and_print("git config --list", "Git Config")
run_and_print("git status", "Git Status")
run_and_print("ls -lah", "Directory Listing")
