In [None]:
!pip install flask flask-cors pyngrok nest_asyncio spacy sentence-transformers pypdf2
!python -m spacy download fr_core_news_lg

!curl -fsSL https://ollama.com/install.sh | sh

import subprocess
import time

process = subprocess.Popen(["ollama", "serve"])

time.sleep(5)

!ollama pull llama3

print("PRÊT ")

In [None]:
import subprocess
import time

process = subprocess.Popen(["ollama", "serve"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

server_ready = False
for _ in range(30):
    try:
        check = subprocess.run(["curl", "-s", "http://127.0.0.1:11434"], capture_output=True)
        if check.returncode == 0:
            server_ready = True
            print(" Server active")
            break
    except:
        pass
    time.sleep(1)

if not server_ready:
    print("Ollama refused to start.")
else:
    subprocess.run(["ollama", "pull", "llama3"])
    print(" Ready")

In [None]:
import json
import pickle
import requests
import spacy
import re
import os
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok
from sentence_transformers import SentenceTransformer, util
from PyPDF2 import PdfReader
import nest_asyncio

# ==========================================
# CONFIGURATION
# ==========================================
nest_asyncio.apply()

app = Flask(__name__)


app.json.sort_keys = False

CORS(app)

OLLAMA_API_URL = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "llama3"
EMBEDDING_MODEL = "paraphrase-multilingual-MiniLM-L12-v2"
SPACY_MODEL = "fr_core_news_lg"

SIMILARITY_THRESHOLD = 0.40

# ==========================================
# CHARGEMENT DES RESSOURCES
# ==========================================
print("Chargement des modèles...")
nlp = spacy.load(SPACY_MODEL)
embedder = SentenceTransformer(EMBEDDING_MODEL)

PKL_PATH = "ai_brain2.pkl"
KB_DATA = None
KB_VECS = None

if os.path.exists(PKL_PATH):
    print(f" Chargement de '{PKL_PATH}'...")
    with open(PKL_PATH, 'rb') as f:
        c = pickle.load(f)
        KB_DATA = c['data']
        KB_VECS = c['vectors']
else:
    print(f" '{PKL_PATH}' introuvable ")

# ==========================================
# FONCTIONS UTILITAIRES
# ==========================================

def extraire_pdf_stream(file_stream):
    """Extraction du texte du PDF."""
    structure = {}
    current = "Introduction"
    structure[current] = []
    regex = r"^(chapitre|partie|section|module|introduction|conclusion|annexe|\d+\.|[ivx]+\.)"

    try:
        reader = PdfReader(file_stream)
        text = ""
        for p in reader.pages:
            extracted = p.extract_text()
            if extracted: text += extracted + " "

        clean_text = text.replace('\n', ' ')
        clean_text = re.sub(r'\s+', ' ', clean_text)

        doc = nlp(clean_text)

        for sent in doc.sents:
            line = sent.text.strip()
            if len(line) < 10: continue

            match = re.match(regex, line, re.IGNORECASE)
            if match and len(line) < 60:
                current = line
                if current not in structure:
                    structure[current] = []
            else:
                structure[current].append(line)
    except Exception as e:
        print(f" Erreur PDF : {e}")
    return structure

def interroger_llama_batch(liste_phrases_contextes):
    """
    Prompt pour reformuler proprement.
    """
    prompt_content = ""
    for i, item in enumerate(liste_phrases_contextes):
        prompt_content += f"ITEM {i} : Phrase='{item['phrase']}' (Contexte='{item['context']}')\n"

    prompt = f"""
    Tu es un expert pédagogique. Analyse la liste d'items suivante provenant d'un cours en langue française.

    DONNÉES À ANALYSER:
    {prompt_content}

    TACHE :
    Pour chaque ITEM, génère un objet JSON strict :
    1. "is_correct": true si la phrase est factuellement correcte selon le contexte, sinon false.
    2. "node_label": Un titre très court (max 4 mots) résumant l'idée.
    3. "refined_text": Une réécriture claire et professionnelle de la phrase.
    4. "correction": Si faux, explique pourquoi, sinon null.

    FORMAT DE RÉPONSE ATTENDU (JSON PUR SEULEMENT) :
    [
      {{ "is_correct": true, "node_label": "...", "refined_text": "...", "correction": null }},
      ...
    ]
    Ne mets aucun texte avant ou après le JSON.
    """

    try:
        response = requests.post(OLLAMA_API_URL, json={
            "model": OLLAMA_MODEL,
            "prompt": prompt,
            "stream": False,
            "format": "json",
            "options": {
                "temperature": 0.1
            }
        })

        if response.status_code == 200:
            json_str = response.json()['response']
            json_str = json_str.replace("```json", "").replace("```", "").strip()
            data = json.loads(json_str)

            if isinstance(data, dict) and 'items' in data: return data['items']
            if isinstance(data, list): return data
            if isinstance(data, dict): return [data]

    except Exception as e:
        print(f" Erreur Llama 3 : {e}")
    return []

# ==========================================
# ROUTE API
# ==========================================

@app.route('/analyze', methods=['POST'])
def analyze():
    if KB_VECS is None: return jsonify({"error": "Cerveau non chargé"}), 500
    if 'file' not in request.files: return jsonify({"error": "Aucun fichier"}), 400

    file = request.files['file']
    dict_chapitres = extraire_pdf_stream(file)

    arbre_final = {"root_label": f"Synthèse : {file.filename}", "branches": []}

    print(f"Analyse IA en cours...")

    for chapitre_titre, phrases in dict_chapitres.items():
        if not phrases: continue
        batch_a_traiter = []

        for phrase in phrases:
            vec = embedder.encode(phrase, convert_to_tensor=True)
            hits = util.semantic_search(vec, KB_VECS, top_k=1)

            if hits and hits[0][0]['score'] >= SIMILARITY_THRESHOLD:
                hit = hits[0][0]
                kb_context = KB_DATA[hit['corpus_id']].get('definition', '')
                batch_a_traiter.append({"phrase": phrase, "context": kb_context})

        if batch_a_traiter:
            chunk_size = 5
            enfants = []

            for i in range(0, len(batch_a_traiter), chunk_size):
                chunk = batch_a_traiter[i:i+chunk_size]
                res_ia = interroger_llama_batch(chunk)

                if res_ia:
                    for idx, r in enumerate(res_ia):
                        if idx >= len(chunk): break

                        original_phrase = chunk[idx]['phrase']

                        if isinstance(r, dict):
                            texte_final = r.get('refined_text', original_phrase)
                            label_final = r.get('node_label', 'Info')
                            correction = r.get('correction')
                            status = "correct" if r.get('is_correct') else "warning"
                        else:
                            texte_final = original_phrase
                            label_final = "Info"
                            correction = None
                            status = "correct"

                        enfants.append({
                            "label": label_final,
                            "full_text": texte_final,
                            "status": status,
                            "correction": correction
                        })

            if enfants:
                arbre_final['branches'].append({"name": chapitre_titre, "children": enfants})
                print(f"   '{chapitre_titre}' traité.")

    return jsonify(arbre_final)

@app.route('/expand', methods=['POST'])
def expand():
    data = request.json
    concept = data.get('concept', '')
    context = data.get('context', '')

    if not concept: return jsonify({"error": "No concept provided"}), 400

    print(f" Deep Dive request: {concept} (Context: {context})")

    prompt = f"""
    Tu es un professeur expert. L'étudiant veut approfondir le concept suivant : "{concept}".
    Contexte du cours : "{context}".

    TACHE :
    Génère 3 sous-points clés (courts et précis) pour expliquer ce concept en détail.

    FORMAT JSON ATTENDU :
    [
      {{ "label": "Titre court", "full_text": "Explication détaillée...", "status": "correct" }},
      {{ "label": "Titre court", "full_text": "Explication détaillée...", "status": "correct" }},
      {{ "label": "Titre court", "full_text": "Explication détaillée...", "status": "correct" }}
    ]
    Ne mets que le JSON.
    """

    try:
        response = requests.post(OLLAMA_API_URL, json={
            "model": OLLAMA_MODEL,
            "prompt": prompt,
            "stream": False,
            "format": "json",
            "options": {"temperature": 0.3}
        })

        if response.status_code != 200:
            print(f" OLLAMA ERROR ({response.status_code}): {response.text}")
            return jsonify({"error": "Ollama generation failed"}), 500

        json_str = response.json()['response']
        print(f" RAW AI OUTPUT: {json_str[:100]}...")

        match = re.search(r'(\{.*\}|\[.*\])', json_str, re.DOTALL)
        if match:
            json_str = match.group(0)
        else:
            print(" No JSON structure found")
            return jsonify({"children": []})

        parsed_data = json.loads(json_str)
        children = []

        if isinstance(parsed_data, list):
            children = parsed_data
        elif isinstance(parsed_data, dict):
            found_list = False
            for key in ['items', 'children', 'data', 'nodes', 'response', 'result']:
                if key in parsed_data and isinstance(parsed_data[key], list):
                    children = parsed_data[key]
                    found_list = True
                    break

            if not found_list:
                for val in parsed_data.values():
                    if isinstance(val, list):
                        children = val
                        found_list = True
                        break

            if not found_list:
                children = list(parsed_data.values())

        final_children = []
        if isinstance(children, list):
            for item in children:
                if isinstance(item, dict):
                    final_children.append(item)
                elif isinstance(item, list):
                    final_children.extend([sub for sub in item if isinstance(sub, dict)])

        print(f" Sending {len(final_children)} children to app.")
        return jsonify({"children": final_children})

    except Exception as e:
        print(f" Python Exception: {e}")
        return jsonify({"error": str(e)}), 500


# ==========================================
# ROUTE DE TEST (PING)
# ==========================================
@app.route('/ping', methods=['GET'])
def ping():
    """Simple endpoint to check connectivity."""
    return jsonify({
        "status": "online",
        "message": "Connection established successfully with Colab!"
    }), 200
# ==========================================
# LANCEMENT
# ==========================================
NGROK_TOKEN = "37Tqzw2JTV47x34fzwx77YTsLpV_3u1gPzjE3BNLa192pmWEd"
ngrok.set_auth_token(NGROK_TOKEN)
public_url = ngrok.connect(5000)
print(f"API PRÊTE : {public_url}")

app.run(port=5000)