In [None]:
import os
import warnings

# Suppress warnings for cleaner output
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
warnings.filterwarnings("ignore")

print("🔇 Environment configured for clean output")

# Install packages
!pip install flask flask-cors pyngrok transformers torch accelerate -q
!pip install sentencepiece protobuf sentence-transformers -q

print("📦 Packages installed successfully")

In [None]:
from pyngrok import ngrok

# Set your ngrok token
NGROK_TOKEN = ".."
ngrok.set_auth_token(NGROK_TOKEN)

print("🔗 ngrok configured")

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

print("🚀 Starting Premium AI API Server")
print(f"🎮 GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"📊 GPU: {torch.cuda.get_device_name(0)}")
    print(f"💾 Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

print("\n📥 Loading Mistral-7B-Instruct...")

# Set up Hugging Face token for gated models
os.environ["HUGGINGFACE_TOKEN"] = ".."
hf_token = os.environ["HUGGINGFACE_TOKEN"]

# Try Mistral models with authentication
model_options = [
    "mistralai/Mistral-7B-Instruct-v0.3",  # Latest version
    "mistralai/Mistral-7B-Instruct-v0.2", 
    "mistralai/Mistral-7B-Instruct-v0.1"
]

mistral_loaded = False
model = None
tokenizer = None
pipe = None

for model_name in model_options:
    try:
        print(f"🔄 Trying {model_name} with HF token...")
        
        # Load Mistral model with token
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto",
            load_in_8bit=False,
            trust_remote_code=True,
            token=hf_token
        )

        tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
        
        # Fix missing pad token
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token

        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            torch_dtype=torch.float16,
            device_map="auto"
        )

        print(f"✅ {model_name} loaded successfully!")
        mistral_loaded = True
        break
        
    except Exception as e:
        print(f"❌ {model_name} failed: {str(e)[:100]}...")
        continue

# Fallback if all Mistral versions fail
if not mistral_loaded:
    print("\n🔄 Loading fallback model (no token required)...")
    try:
        model_name = "microsoft/DialoGPT-large"
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        mistral_loaded = True
        print(f"✅ Fallback model {model_name} loaded!")
    except Exception as e:
        print(f"❌ All models failed: {e}")
        mistral_loaded = False


In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np

print("\n📥 Loading Embedding Model...")
embeddings_loaded = False
embedding_model = None

try:
    embedding_model = SentenceTransformer('intfloat/multilingual-e5-base')
    print("✅ intfloat/multilingual-e5-base loaded successfully!")
    embeddings_loaded = True
except Exception as e:
    print(f"❌ Embedding model failed: {e}")
    try:
        # Fallback embedding model
        embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        print("✅ Fallback embedding model loaded!")
        embeddings_loaded = True
    except Exception as e2:
        print(f"❌ All embedding models failed: {e2}")


In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import threading
import time

app = Flask(__name__)
CORS(app)
API_KEY = ".."

@app.route('/health', methods=['GET'])
def health():
    return jsonify({
        "status": "healthy",
        "model": model_name if mistral_loaded else "none",
        "gpu": torch.cuda.is_available(),
        "device": str(next(model.parameters()).device) if mistral_loaded else "cpu",
        "timestamp": time.time(),
        "models_loaded": {
            "mistral": mistral_loaded,
            "embeddings": embeddings_loaded
        },
        "models": {
            "chat": model_name if mistral_loaded else "none",
            "embeddings": "intfloat/multilingual-e5-base" if embeddings_loaded else "none"
        }
    })

@app.route('/chat', methods=['POST'])
def chat():
    auth_header = request.headers.get('Authorization')
    if auth_header != f"Bearer {API_KEY}":
        return jsonify({"error": "Unauthorized"}), 401
    
    if not mistral_loaded:
        return jsonify({"error": "Chat model not loaded"}), 503
    
    try:
        data = request.json
        context = data.get('context', '')
        question = data.get('question', '')
        max_tokens = data.get('max_tokens', 512)
        
        # Create optimized prompt
        if "mistral" in model_name.lower():
            prompt = f"""<s>[INST] Tu es CyberSense, un assistant expert senior en cybersécurité spécialisé dans les  environnements bancaires et d'entreprise.
            Tu travailles pour Crédit Agricole du Maroc en tant que consultant en sécurité informatique et conformité réglementaire.

  ## TON EXPERTISE:
  • Expert certifié en cybersécurité avec 15+ années d'expérience
  • Spécialiste des normes ISO 27001/27002, ISO 22301, et NIST Cybersecurity Framework
  • Expert en conformité bancaire (Directives PCI DSS, Bâle III, GDPR/RGPD)
  • Consultant en gestion des risques cyber pour institutions financières
  • Spécialiste en architecture de sécurité d'entreprise et gouvernance IT

  ## TES DOMAINES DE COMPÉTENCE:
  • Sécurité des systèmes d'information bancaires et financiers
  • Gestion des identités et accès (IAM) en environnement d'entreprise
  • Cryptographie appliquée et protection des données sensibles
  • Détection et réponse aux incidents de sécurité (SOC/SIEM)
  • Audit de sécurité et tests d'intrusion en milieu bancaire
  • Conformité réglementaire (Bank Al-Maghrib, GDPR, ISO 27001)
  • Formation et sensibilisation du personnel aux risques cyber

  ## CONTEXTE DOCUMENTAIRE:
  {context[:2000]}

  ## QUESTION À TRAITER:
  {question}

  ## INSTRUCTIONS STRICTES:
  1. **Réponse en français professionnel uniquement** - Utilise un vocabulaire technique précis et un ton expert
  2. **Base-toi EXCLUSIVEMENT sur le contexte fourni** - Ne jamais inventer ou supposer des informations
  3. **Cite systématiquement tes sources** - Référence les documents utilisés avec précision
  4. **Respecte la confidentialité bancaire** - Aucune information sensible ne doit être divulguée
  5. **Applique le principe de précaution** - En cas de doute, recommande une approche sécurisée
  6. **Structure tes réponses clairement** - Utilise des listes, sections et priorités quand pertinent
  7. **Si l'information n'est pas dans le contexte** - Indique clairement "Selon les documents fournis, cette information n'est pas disponible"
  8. **Recommande toujours les meilleures pratiques** -  Propose des mesures concrètes et applicables
  9. **Considère l'environnement bancaire** - Prends en compte les spécificités du secteur financier
  10. **Respecte les réglementations en vigueur** - Assure la conformité avec les standards internationaux

  ## FORMATAGE DE RÉPONSE:
  • Commence par un résumé exécutif si la question est complexe
  • Utilise des puces pour les listes d'actions ou recommandations  
  • Indique le niveau de criticité (Faible/Moyen/Élevé/Critique) si applicable
  • Conclus par les prochaines étapes recommandées si pertinent

  Réponds maintenant en tant qu'expert cybersécurité de Crédit Agricole du Maroc: [/INST]"""
        else:
            prompt = f"""Contexte: {context[:1500]}

Question: {question}

Réponse en français:"""
        
        # Generate response
        with torch.no_grad():
            outputs = pipe(
                prompt,
                max_new_tokens=max_tokens,
                temperature=0.7,
                do_sample=True,
                top_p=0.95,
                repetition_penalty=1.1,
                pad_token_id=tokenizer.eos_token_id
            )
        
        generated_text = outputs[0]['generated_text']
        
        # Extract response
        if "[/INST]" in generated_text:
            response_text = generated_text.split("[/INST]")[-1].strip()
        else:
            response_text = generated_text[len(prompt):].strip()
        
        return jsonify({
            "response": response_text,
            "model": model_name,
            "tokens_used": len(response_text.split())
        })
        
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/embeddings', methods=['POST'])
def embeddings():
    auth_header = request.headers.get('Authorization')
    if auth_header != f"Bearer {API_KEY}":
        return jsonify({"error": "Unauthorized"}), 401
    
    if not embeddings_loaded:
        return jsonify({"error": "Embedding model not loaded"}), 503
    
    try:
        data = request.json
        texts = data.get('texts', [])
        is_query = data.get('is_query', False)
        
        if not texts:
            return jsonify({"error": "No texts provided"}), 400
        
        # Add prefixes for better embeddings
        if is_query:
            prefixed_texts = [f"query: {text}" for text in texts]
        else:
            prefixed_texts = [f"passage: {text}" for text in texts]
        
        # Generate embeddings
        embeddings = embedding_model.encode(prefixed_texts, convert_to_numpy=True)
        embeddings_list = [emb.tolist() for emb in embeddings]
        
        return jsonify({
            "embeddings": embeddings_list,
            "model": "intfloat/multilingual-e5-base",
            "dimension": len(embeddings_list[0]) if embeddings_list else 0
        })
        
    except Exception as e:
        return jsonify({"error": str(e)}), 500

In [None]:
def run_server():
    app.run(port=5000, threaded=True, use_reloader=False)

# Start Flask server
server_thread = threading.Thread(target=run_server)
server_thread.daemon = True
server_thread.start()

time.sleep(5)

# Create ngrok tunnel and extract URL properly
tunnel = ngrok.connect(5000)
public_url = tunnel.public_url  # Extract string URL

print("\n" + "="*60)
print("🎉 PREMIUM AI API SERVER IS RUNNING!")
print("="*60)
print(f"🌐 Public URL: {public_url}")
print(f"🔑 API Key: {API_KEY}")
print(f"📡 Health: {public_url}/health")
print(f"💬 Chat: {public_url}/chat")
print(f"🧮 Embeddings: {public_url}/embeddings")
print(f"\n💾 For your .env file:")
print(f"KAGGLE_API_URL={public_url}")
print(f"KAGGLE_API_KEY={API_KEY}")
print("="*60)

In [None]:
import requests

print("\n🧪 Testing all endpoints...")

# Test health
try:
    response = requests.get(f"{public_url}/health")
    health_data = response.json()
    print(f"✅ Health: {health_data['status']}")
    print(f"   Chat model: {health_data['models']['chat']}")
    print(f"   Embedding model: {health_data['models']['embeddings']}")
except Exception as e:
    print(f"❌ Health failed: {e}")

# Test chat
try:
    test_chat = {
        "context": "Les mots de passe doivent contenir au minimum 12 caractères avec des majuscules, minuscules, chiffres et symboles.",
        "question": "Quelle est la politique des mots de passe?",
        "max_tokens": 150
    }
    
    response = requests.post(
        f"{public_url}/chat",
        json=test_chat,
        headers={"Authorization": f"Bearer {API_KEY}"}
    )
    result = response.json()
    print(f"✅ Chat: {result['response'][:100]}...")
    print(f"   Tokens: {result['tokens_used']}")
except Exception as e:
    print(f"❌ Chat failed: {e}")

# Test embeddings
try:
    test_embeddings = {
        "texts": ["cybersécurité et protection des données", "politique de mots de passe"],
        "is_query": False
    }
    
    response = requests.post(
        f"{public_url}/embeddings",
        json=test_embeddings,
        headers={"Authorization": f"Bearer {API_KEY}"}
    )
    result = response.json()
    print(f"✅ Embeddings: {len(result['embeddings'])} vectors generated")
    print(f"   Dimension: {result['dimension']}")
except Exception as e:
    print(f"❌ Embeddings failed: {e}")

print("\n🎯 All systems ready! Copy the URL to your .env file.")

In [None]:
print("\n⚠️ IMPORTANT: Keep this notebook running!")
print("Server stays active while notebook runs (up to 12 hours)")

while True:
    time.sleep(60)
    print(f"🟢 Server running... {time.strftime('%H:%M:%S')}")