# NLP2 Project

### Spacy, developper information to prompt for T5

In [1]:
import spacy
from itertools import product

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

def extract_important_phrases(value):
    """
    Extracts key phrases from the text, including verbs, adjectives, nouns, and named entities.
    """
    doc = nlp(value)
    key_phrases = []

    # Extract named entities
    for ent in doc.ents:
        key_phrases.append(ent.text)

    # Extract important verbs, adjectives, and noun phrases
    for token in doc:
        if token.pos_ in {"VERB", "ADJ", "NOUN", "PROPN"}:  # Include verbs, adjectives, nouns, and proper nouns
            key_phrases.append(token.text)
    
    # Remove duplicates while preserving order
    key_phrases = list(dict.fromkeys(key_phrases))
    
    return ", ".join(key_phrases) if key_phrases else value

def generate_flexible_context(data):
    context = "Player context: "
    
    for key, value in data.items():
        key_doc = nlp(key.replace("_", " "))
        key_phrase = " ".join([token.text for token in key_doc])
        
        context += f"{key_phrase.capitalize()} is {value}. "
    
    return context.strip()

def transform_to_prompts(data):
    prompts = []
    player_variations = []
    npc_context_parts = []

    for field, details in data.items():
        field_type = details.get("type")
        value = details.get("value")
        
        if field_type == "text":
            extracted_info = extract_important_phrases(value)
            npc_context_parts.append(f"{field}: {extracted_info}")
        
        elif field_type == "choice":
            options = [opt.strip() for opt in value.split(",")]
            player_variations.append([(field, option) for option in options])

    npc_context = " | ".join(npc_context_parts)

    player_state_combinations = product(*player_variations)
    for player_state in player_state_combinations:
        player_state_str = " | ".join([f"{field} (Player): {state}" for field, state in player_state])
        
        prompt = (
            f"Generate dialogue considering the following:\n"
            f"NPC Context: {npc_context}\n"
            f"Player State: {player_state_str}."
        )
        prompts.append(prompt)
    
    return prompts


### T5 generation

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")

def generate_dialogue(prompts, num_responses=2):
    dialogue_options = []

    for prompt in prompts:
        input_ids = tokenizer.encode(prompt, return_tensors="pt")

        outputs = model.generate(
            input_ids,
            max_length=50,
            num_return_sequences=num_responses,
            temperature=0.8,
            top_k=50,
            top_p=0.9,
            do_sample=True
        )

        dialogues = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
        dialogue_options.extend(dialogues)

    return dialogue_options

## BERT selection

In [3]:
from transformers import BertTokenizer, BertModel
import torch
import torch.nn.functional as F

# Charger le modèle et le tokenizer de BERT
bert_tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")
bert_model = BertModel.from_pretrained("bert-large-uncased")

def embed_text(text):
    """Retourne l'embedding du texte donné en utilisant BERT."""
    inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = bert_model(**inputs)
    embeddings = outputs.last_hidden_state[:, 0, :]  # Prendre le vecteur de [CLS]
    return embeddings

def score_similarity(context, dialogue):
    """Calcule la similarité cosinus entre le contexte et le dialogue."""
    context_embedding = embed_text(context)
    dialogue_embedding = embed_text(dialogue)
    similarity = F.cosine_similarity(context_embedding, dialogue_embedding)
    return similarity.item()

def select_final_dialogue(context, filtered_dialogues):
    """Choisit le dialogue ayant la meilleure correspondance avec le contexte."""
    best_score = -1
    best_dialogue = None
    
    score_debug = {}

    for dialogue in filtered_dialogues:
        score = score_similarity(context, dialogue)
        score_debug[dialogue] = score
        if score > best_score:
            best_score = score
            best_dialogue = dialogue
    
    return best_dialogue, score_debug


## Function for the api

In [4]:
# --- Debug Info Functions ---

def generate_debug_info(input_data, prompts):
    """Return debug information for dialogue generation step."""
    debug_info = {"Received Input Data": input_data, 'prompts': prompts}
    return debug_info

def select_debug_info(input_data):
    """Return debug information for dialogue selection/filtering step."""
    return input_data

def choices_debug_info(input_data, score_debug):
    """Return debug information for final dialogue confirmation step."""
    debug_response = {"Message received": input_data, "Score Debug": score_debug}
    return debug_response

# --- Core Processing Functions ---

def generate_dialogue_variations(input_data):
    """Generate multiple dialogue options based on game context and player status."""

    prompts = transform_to_prompts(input_data)
    dialogue = generate_dialogue(prompts)

    return prompts, dialogue

def filter_dialogue_variations(selected_dialogues):
    """Filter out unwanted dialogue options based on developer input."""
    return

def select_best_dialogue(player_state, filtered_dialogues):
    """Select the best dialogue option matching the player’s current state."""
    
    context = generate_flexible_context(player_state)
    selected_final_dialogue, score_debug = select_final_dialogue(context, filtered_dialogues)

    return selected_final_dialogue, score_debug


## Présentation Streamlit

In [None]:
from flask import Flask, request, jsonify
import threading
import psutil
import os

app = Flask(__name__)

# Function to free up a port if necessary
def free_port(port=5000):
    for conn in psutil.net_connections(kind='inet'):
        if conn.laddr.port == port:
            pid = conn.pid
            if pid:
                os.kill(pid, 9)

# Ensure port 5000 is free before starting Flask
free_port()

# --- API Endpoints ---

@app.route('/generate_dialogue_options', methods=['POST'])
def generate_dialogue_options():
    input_data = request.get_json()
    debug = input_data.pop('debug', False)

    response_payload = {}
    prompts, dialogue_options = generate_dialogue_variations(input_data)

    response_payload['dialogue_options'] = dialogue_options

    if debug:
        response_payload["debug_info"] = generate_debug_info(input_data, prompts)

    return jsonify(response_payload)

filtered_dialogues_storage = {}

@app.route('/filter_dialogue_options', methods=['POST'])
def filter_dialogue_options():
    input_data = request.get_json()
    debug = input_data.get("debug", False)

    # Identifier l'utilisateur ou générer un ID si nécessaire
    user_id = input_data.get("user_id", "default_user")  # Il est mieux de passer un `user_id` unique par utilisateur
    
    response_payload = {}
    
    # Stocker les dialogues filtrés dans le dictionnaire
    filtered_dialogues_storage[user_id] = input_data.get("selected_options", [])

    if debug:
        response_payload["debug_info"] = select_debug_info(input_data)

    return jsonify(response_payload)

@app.route('/confirm_final_dialogue', methods=['POST'])
def confirm_final_dialogue():
    input_data = request.get_json()
    debug = input_data.pop("debug", False)

    user_id = input_data.get("user_id", "default_user")

    filtered_dialogue = filtered_dialogues_storage.get(user_id)
    
    response_payload = {}
    response_payload['final_selected_dialogue'], score_debug = select_best_dialogue(input_data, filtered_dialogue)

    if debug:
        response_payload["debug_info"] = choices_debug_info(input_data, score_debug)

    return jsonify(response_payload)

# --- Run Flask App ---

def run_app():
    app.run(port=5000)

# Start the Flask server in a separate thread
flask_thread = threading.Thread(target=run_app)
flask_thread.start()
