In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Install required packages
!pip install -q bitsandbytes accelerate transformers peft flask flask-cors

# Setup ngrok
!wget -q https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz
!tar -xvzf ngrok-v3-stable-linux-amd64.tgz
!mv ngrok /usr/local/bin/ngrok

# Add your ngrok auth token
NGROK_AUTH_TOKEN = "2zwocK9uYIMXm6n03tVrbhwyNpY_4BEthEvmBRn9EMK3KKQzk"
!ngrok config add-authtoken $NGROK_AUTH_TOKEN


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m60.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.5/207.5 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━

In [None]:
!pkill ngrok


In [None]:
# === Flask + Ngrok Serve ===
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch
import subprocess
import threading
import requests
import time

app = Flask(__name__)
CORS(app)

# === Load Model ===
base_model_id = "fdtn-ai/Foundation-Sec-8B"
lora_path = "/content/drive/MyDrive/fs8b-nist-finetuned"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_enable_fp32_cpu_offload=True
)

tokenizer = AutoTokenizer.from_pretrained(base_model_id)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
    device_map="auto"
)
model = PeftModel.from_pretrained(base_model, lora_path, is_trainable=False)

# === RBAC + NIST Keyword Filtering ===
role_permissions = {
    "Security Analyst": ["Identify", "Detect"],
    "Security Engineer": ["Identify", "Protect", "Detect"],
    "Threat Intelligence": ["Detect", "Respond"],
    "Security Administrator": ["Protect", "Respond", "Recover"],
    "Reverse Engineer": ["Detect", "Respond"],
    "Security Consultant": ["Govern", "Protect", "Recover"],
    "Security Architect": ["Govern", "Identify", "Protect", "Recover"],
    "Admin": ["Govern", "Identify", "Protect", "Detect", "Respond", "Recover"]
}

function_keywords = {
    "Govern": ["governance", "policy", "strategy", "oversight"],
    "Identify": ["asset", "inventory", "risk assessment", "identify"],
    "Protect": ["access control", "training", "data security", "protect"],
    "Detect": ["monitoring", "detection", "anomaly", "event", "detect"],
    "Respond": ["incident", "response", "communication", "respond"],
    "Recover": ["recovery", "restore", "lessons learned", "recover"]
}

def get_functions_from_prompt(prompt):
    prompt_lower = prompt.lower()
    return [
        func for func, keywords in function_keywords.items()
        if any(keyword in prompt_lower for keyword in keywords)
    ]

@app.route('/')
def home():
    return "✅ Model + Ngrok are running!"

@app.route('/generate', methods=['POST'])
def generate():
    data = request.get_json()
    prompt = data.get("prompt", "")
    role = data.get("role", "Default")

    functions_requested = get_functions_from_prompt(prompt)
    allowed = role_permissions.get(role, [])
    denied = [func for func in functions_requested if func not in allowed]

    if denied:
        return jsonify({
            "response": "⛔ Access Denied: Your role does not allow access to: " + ", ".join(denied),
            "access_granted": False,
        })

    # Inject role into prompt
    full_prompt = f"As a {role} working with NIST CSF 2.0, {prompt}"
    inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda")
    inputs = {k: v.to(torch.float16) if torch.is_floating_point(v) else v for k, v in inputs.items()}
    output = model.generate(**inputs, max_new_tokens=200)
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    print("✅ Decoded output:", decoded)


    return jsonify({
        "response": decoded,
        "access_granted": True,
    })

# === Ngrok Setup ===
def start_ngrok():
    ngrok_proc = subprocess.Popen(["ngrok", "http", "5000"])
    time.sleep(4)
    try:
        r = requests.get("http://localhost:4040/api/tunnels")
        for tunnel in r.json()['tunnels']:
            if tunnel['proto'] == 'https':
                print("✅ NGROK PUBLIC URL:", tunnel['public_url'])
    except Exception as e:
        print("❌ Couldn't fetch ngrok URL:", e)

def start_flask():
    app.run(host="0.0.0.0", port=5000)

# === Start Both ===
threading.Thread(target=start_flask).start()
threading.Thread(target=start_ngrok).start()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/630 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/840 [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
import subprocess
import requests
import time

def start_ngrok():
    ngrok_proc = subprocess.Popen(["ngrok", "http", "5000"])
    time.sleep(4)  # give ngrok time to initialize
    try:
        r = requests.get("http://localhost:4040/api/tunnels")
        for tunnel in r.json()['tunnels']:
            if tunnel['proto'] == 'https':
                print("✅ NGROK PUBLIC URL:", tunnel['public_url'])
                return tunnel['public_url']
    except Exception as e:
        print("❌ Couldn't fetch ngrok URL:", e)

start_ngrok()


✅ NGROK PUBLIC URL: https://8e23970b6b45.ngrok-free.app


'https://8e23970b6b45.ngrok-free.app'

import requests

res = requests.post(
    "http://127.0.0.1:5000/generate",
    json={
        "prompt": "functions in NIST CSF 2.0?",
        "role": "Security Analyst"
    }
)

print(res.status_code)
print(res.json())
