In [4]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# ðŸš€ ArgLegalSumm Streamlit Web App on Google Colab

Run the legal judgment summarizer Streamlit app directly in Google Colab and expose it on a public URL.

Highlights:
- No local setup required; runs entirely on Colab.
- Two tunnel options for a public URL:
  - Option A: pyngrok (token optional; recommended for stability)
  - Option B: cloudflared (no token needed)
- Load your model by uploading a .zip inside the app, or mount Google Drive and paste the path.

Follow the notebook top-to-bottom. Only customize the configuration cell as needed.

In [23]:
# Environment check
import sys, subprocess, json, os, time, pathlib, shutil
from pathlib import Path
import torch
print('Python:', sys.version)
print('CUDA available:', torch.cuda.is_available())

Python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
CUDA available: False


In [24]:
# Install dependencies (ngrok optional; cloudflared optional)
# Safe to run multiple times
# Note: Colab images are reset on runtime restart
!pip -q install streamlit "transformers>=4.38.0" peft accelerate pypdf sentencepiece
!pip -q install pyngrok cloudflared
import importlib, sys
from pathlib import Path
import shutil
for pkg in ["streamlit","transformers","peft","accelerate","pypdf","sentencepiece","pyngrok","cloudflared"]:
    try:
        importlib.import_module(pkg)
        print(f"OK: {pkg}")
    except Exception as e:
        print(f"WARN: {pkg} import failed: {e}")

# Try to find cloudflared executable path after installation
cloudflared_path = shutil.which("cloudflared")
if cloudflared_path:
    print(f"\nFound cloudflared executable at: {cloudflared_path}")
else:
    print("\nCould not automatically find cloudflared executable in PATH.")
    # Attempt to find it in common pip install locations
    for path in sys.path:
        possible_path = Path(path) / ".." / "bin" / "cloudflared"
        if possible_path.exists():
            print(f"Found possible cloudflared path in sys.path: {possible_path.resolve()}")
            break
    else:
        print("Could not find cloudflared executable in common pip install locations.")

OK: streamlit
OK: transformers
OK: peft
OK: accelerate
OK: pypdf
OK: sentencepiece
OK: pyngrok
WARN: cloudflared import failed: cannot import name 'CloudFlare' from 'cloudflared.cloudflare' (/usr/local/lib/python3.12/dist-packages/cloudflared/cloudflare.py)

Could not automatically find cloudflared executable in PATH.
Could not find cloudflared executable in common pip install locations.


In [12]:
# Configuration (edit as needed)
APP_PORT = 8501
USE_NGROK = False  # Set False to use cloudflared
NGROK_AUTH_TOKEN = ""  # optional but helpful for stability
BASE_MODEL = "allenai/led-base-16384"  # used by app when loading LoRA adapters
import os
os.environ["BASE_MODEL"] = BASE_MODEL
print({"APP_PORT": APP_PORT, "USE_NGROK": USE_NGROK, "BASE_MODEL": BASE_MODEL})

{'APP_PORT': 8501, 'USE_NGROK': False, 'BASE_MODEL': 'allenai/led-base-16384'}


In [8]:
# Hardcoded path for the webapp folder (replace with your actual path)
# Example: /content/arglegalsumm-master/webapp (if you cloned the repo to /content)
hardcoded_webapp_path = "/content/drive/MyDrive/legal summarizer/output/dir/webapp" # TODO: change to your path

assert os.path.isdir(hardcoded_webapp_path), f'Missing: {hardcoded_webapp_path}'

# Validate presence of app_streamlit.py
app_py = Path(hardcoded_webapp_path) / 'app_streamlit.py'
assert app_py.exists(), f'Expected app_streamlit.py at {app_py}. Ensure you specified the correct folder.'
print('Found:', app_py)

Found: /content/drive/MyDrive/legal summarizer/output/dir/webapp/app_streamlit.py


In [13]:
# Optional: set DEFAULT_MODEL_DIR for the app; otherwise use Upload .zip in app sidebar
import os
DEFAULT_MODEL_DIR =  "/content/drive/MyDrive/legal summarizer/output/dir/best"
if DEFAULT_MODEL_DIR:
    os.environ["MODEL_DIR"] = DEFAULT_MODEL_DIR
    print('MODEL_DIR set to', DEFAULT_MODEL_DIR)
else:
    print('MODEL_DIR not set; use Upload .zip in the app sidebar.')

# Ensure BASE_MODEL is exported (used for LoRA adapters)
os.environ["BASE_MODEL"] = os.environ.get("BASE_MODEL", BASE_MODEL)
print('BASE_MODEL:', os.environ["BASE_MODEL"])

MODEL_DIR set to /content/drive/MyDrive/legal summarizer/output/dir/best
BASE_MODEL: allenai/led-base-16384


In [None]:
# Start Streamlit app in background
import subprocess, time
cmd = f"cd /content/webapp && streamlit run app_streamlit.py --server.headless true --server.port {APP_PORT}"
print('Starting Streamlit with:', cmd)
sp = subprocess.Popen(["bash","-lc", cmd], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
time.sleep(3)
print('Streamlit process started (background).')

Starting Streamlit with: cd /content/webapp && streamlit run app_streamlit.py --server.headless true --server.port 8501
Streamlit process started (background).


In [15]:
# 7A: Expose via pyngrok (if USE_NGROK)
from pyngrok import ngrok
public_url = None
if USE_NGROK:
    if NGROK_AUTH_TOKEN:
        ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    tunnel = ngrok.connect(APP_PORT, "http")
    public_url = tunnel.public_url
    print('Public URL (ngrok):', public_url)
else:
    print('Skipping ngrok; see 7B for cloudflared.')

Skipping ngrok; see 7B for cloudflared.


In [22]:
# 7B (robust): Install and expose via cloudflared (no token)
import os, re, sys, subprocess, time, shutil, stat, urllib.request

def ensure_cloudflared():
    path = shutil.which("cloudflared")
    if path:
        print("cloudflared in PATH:", path)
        return path
    # Download static binary to /content/cloudflared
    url = "https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64"
    dst = "/content/cloudflared"
    print("Downloading cloudflared to", dst)
    urllib.request.urlretrieve(url, dst)
    os.chmod(dst, os.stat(dst).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
    return dst

cf_bin = ensure_cloudflared()
print("Starting cloudflared tunnel...")
sp_tunnel = subprocess.Popen(["bash","-lc", f"{cf_bin} tunnel --url http://localhost:{APP_PORT} --no-autoupdate"],
                             stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

# Parse output for trycloudflare URL
url = None
for _ in range(400):
    line = sp_tunnel.stdout.readline()
    if not line:
        time.sleep(0.1)
        continue
    if "trycloudflare.com" in line:
        m = re.search(r"https://[\\w.-]+trycloudflare.com", line)
        if m:
            url = m.group(0)
            print("Public URL (cloudflared):", url)
            break
if not url:
    print("Could not detect cloudflared URL yet; check logs above.")

Downloading cloudflared to /content/cloudflared
Starting cloudflared tunnel...


KeyboardInterrupt: 

# Troubleshooting
- If the app doesnâ€™t open:
  - Re-run the install cell and restart the runtime if needed.
  - Check that /content/webapp/app_streamlit.py exists (Cell 4 validates this).
  - For large models, prefer Drive path instead of Upload .zip.
  - In the app sidebar, you can upload your model/adapter .zip and set the Base model for LoRA (e.g., google/pegasus-large).
- To stop/restart:
  - Interrupt runtime or re-run the Start Streamlit cell; it will spawn a new process on the same port.
  - Change APP_PORT in the config cell if port conflicts.
- If ngrok rate limits:
  - Provide your NGROK_AUTH_TOKEN or switch USE_NGROK=False to use cloudflared.