In [None]:


# Codigo que soporta los enfoques ZERO SHOT y FEW SHOT APROACH SINTETIC/REAL 
# (Necesita cambiar la ruta en EXAMPLES_DIR) + DICTIONARY 15/06/2025
# Carga ejemplos y diccionario desde JSON en scam-examples/sintetic
# Incluye el diccionario de regionalismos en el prompt.
# filepath: c:\Users\Diego\Desktop\PLN fIlter\ModelsClassificationTests.ipynb

import subprocess
import re
import json
import os
import time
import statistics
import sys
from datetime import datetime
from sklearn.metrics import classification_report, accuracy_score
from ollama import Client

# ----------------------------
# CONFIGURACI√ìN GENERAL
# ----------------------------
#OUTPUT_DIR = 'LMMs-Classification-Test-Results/Few-Shot-Sintetic-Dictionary-Definitions-Provided' # Nueva carpeta de salida
OUTPUT_DIR = 'LMMs-Classification-Test-Results/Few-Shot-Sintetic-Aproach' # Nueva carpeta de salida

PROMPT_VERSION = 'v1.3' # Versi√≥n actualizada
APAGADO = False # Mantener la configuraci√≥n de apagado
DEFINITIONS = False
DICTIONARY = False
EXAMPLES = True
EXAMPLES_DIR = 'scam-examples/sintetic' # Directorio para los ejemplos y diccionarios JSON
SELECTED_MODELS_ONLY = True
MODELS_SELECTION = ["mxbai-embed-large:335m"]
# ASUMPCI√ìN: La variable 'dataset' est√° definida en una celda anterior y es accesible globalmente.
# ASUMPCI√ìN: El cliente 'client' de Ollama est√° inicializado.


# ----------------------------
# FUNCIONES AUXILIARES
# ----------------------------

def load_scam_examples_and_dictionary():
    examples = ""
    dictionary = ""

    if EXAMPLES:
        examples += "ADDITIONAL EXAMPLES FOR CONTEXT (Pay close attention to these examples to understand the nuances of each category): "
    if DICTIONARY:
        dictionary += "DICTIONARY OF MEXICAN REGIONALISMS (These terms are common in Mexico City colloquial language and may appear in messages): "
    
    # Cargar ejemplos adicionales
    
    if os.path.exists(EXAMPLES_DIR):
        for filename in os.listdir(EXAMPLES_DIR):
            if filename.endswith('.json'):
                with open(os.path.join(EXAMPLES_DIR, filename), 'r', encoding='utf-8') as f:
                    data = json.load(f)
                    if EXAMPLES is True:
                        examples += f"{os.path.splitext(filename)[0]}: " #para solo extraer el nombre de los archivos sin extencion
                        for i, example in enumerate(data.get('examples', []), 1):
                            examples += f"message {i}: {example['text']}, "
                    if DICTIONARY is True:
                        for entry in data.get('dictionary', []):
                            dictionary += f"{entry['word']}: \"{entry['meaning']},\" "
        return examples, dictionary
    else:
        print(f"Advertencia: El directorio de ejemplos '{EXAMPLES_DIR}' no fue encontrado.")
        return "", ""

def get_training_dataset_size():
    """Cuenta el n√∫mero total de samples en todos los archivos JSON del directorio de ejemplos, desglosado por categor√≠a."""
    total_samples = 0
    category_counts = {'catfishing': 0, 'sextortion': 0, 'harmless': 0}
    
    if os.path.exists(EXAMPLES_DIR):
        for filename in os.listdir(EXAMPLES_DIR):
            if filename.endswith('.json'):
                try:
                    with open(os.path.join(EXAMPLES_DIR, filename), 'r', encoding='utf-8') as f:
                        data = json.load(f)
                        examples = data.get('examples', [])
                        total_samples += len(examples)
                        
                        # Contar por categor√≠a
                        for example in examples:
                            category = example.get('category', '').lower()
                            if category in category_counts:
                                category_counts[category] += 1
                                
                except (json.JSONDecodeError, KeyError) as e:
                    print(f"Error contando samples en {filename}: {e}")
                    continue
    
    if total_samples > 0:
        breakdown = f"catfishing: {category_counts['catfishing']}, sextortion: {category_counts['sextortion']}, harmless: {category_counts['harmless']}"
        return f"{total_samples} samples ({breakdown})"
    else:
        return "0 samples"

    
def get_available_models():

    # Siempre cargar modelos evaluados
    evaluated_models = set()
    if os.path.exists(OUTPUT_DIR):
        for filename in os.listdir(OUTPUT_DIR):
            if filename.endswith('.json'):
                try:
                    with open(os.path.join(OUTPUT_DIR, filename), 'r', encoding='utf-8') as f:
                        data = json.load(f)
                        model_name = data.get('metadata', {}).get('model_name')
                        if model_name:
                            evaluated_models.add(model_name)
                except (json.JSONDecodeError, KeyError) as e:
                    print(f"Error leyendo el archivo {filename}: {e}")
                    continue

    # Obtener informaci√≥n de modelos instalados usando ollama list (una sola vez)
    installed_models = dict()
    try:
        result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, check=True)
        lines = result.stdout.strip().split('\n')[1:]
        for line in lines:
            if line.strip():
                model_name = line.split()[0]
                model_size = line.split()[2] + " " + line.split()[3]
                installed_models[model_name] = model_size
    except subprocess.CalledProcessError as e:
        print(f"Error ejecutando 'ollama list': {e}")
        return []
    except FileNotFoundError:
        print("Comando 'ollama' no encontrado. Aseg√∫rate de que Ollama est√© instalado y en el PATH.")
        return []

    if not SELECTED_MODELS_ONLY:
        # Excluir modelos ya evaluados
        models_to_evaluate = {model: size for model, size in installed_models.items() if model not in evaluated_models}
    else:
        models_to_evaluate = dict()
        # trabaja MODELS_SELECTION como lista o como string unitario segun sea el caso
        model_list = MODELS_SELECTION if isinstance(MODELS_SELECTION, list) else [MODELS_SELECTION]
        for m in model_list:
            if m not in evaluated_models and m in installed_models:
                models_to_evaluate[m] = installed_models[m]
            elif m not in installed_models:
                print(f"Advertencia: El modelo '{m}' no est√° instalado en Ollama, no se puede proseguir")
                sys.exit(1)

    if evaluated_models:
        print(f"\nü§ñ {len(list(evaluated_models))} modelos ya evaluados y excluidos:\n")
        print(*sorted(list(evaluated_models)), sep="\n")
    else:
        print("Ningun modelo ha sido evaluado aun")
    
    return models_to_evaluate

def get_metadata(model_name, model_size, current_dataset, dictionary_text_provided):
    """Devuelve un diccionario con metadatos para incluir en todos los informes."""
    try:
        code_version = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).strip().decode('utf-8')
    except (subprocess.CalledProcessError, FileNotFoundError):
        code_version = 'N/A'
    return {
        'model_name': model_name,
        'model_size': model_size,
        'evaluation_date': datetime.utcnow().isoformat() + 'Z',
        'prompt_version': PROMPT_VERSION,
        'code_version (short-commit-hash)': code_version,
        'training_dataset_size': get_training_dataset_size() if EXAMPLES else "none examples given",
        'examples_directory': f"{EXAMPLES_DIR if EXAMPLES else "none examples given"}",
        'test_dataset_size': str(len(current_dataset)) + ' samples (10 off each category)',
        'dictionary_provided': bool(dictionary_text_provided),           
        'definitions_provided': bool(DEFINITIONS)
    }

def ensure_output_dir():
    """Crea la carpeta de resultados si no existe."""
    os.makedirs(OUTPUT_DIR, exist_ok=True)

def get_model_prediction(model_name, text, additional_examples_text="", dictionary_text=""):
    """Env√≠a prompt al modelo y extrae la etiqueta predicha."""
    prompt = (
        "ROLE: You are a cybersecurity expert specialized in detecting digital fraud and malicious online behavior. "
        
        "CONTEXT: You are analyzing text messages written in Mexico City colloquial language to identify fraudulent behavior patterns in digital communications. "

        f"{dictionary_text}" # INSERCI√ìN DEL DICCIONARIO DE REGIONALISMOS

        f"{"DEFINITIONS OF THE SCAMS TO IDENTIFY "
        "1.- catfishing: Creation of a fake online identity to emotionally deceive the victim, typically requesting money, personal information, or creating emotional dependency. "
        "key differences: focused on establishing a romantic connection to manipulate the victim later, The tone of the characteristic speech is closer to deception, manipulation through emotional ties, persuasion and the lack of naturalness with which one tries to reach the link, usually ask for money for false emergencies or other contexts. "
        "2.-sextortion: Sexual blackmail where the attacker threatens to publish intimate content of the victim unless they comply with demands (usually money). "
        "key differences: it involves a video, photo or any other media, The tone of the characteristic speech is closer to blackmail, coercion, coercive or directly threatening action. "
        "3.-harmless: Normal and legitimate communication without fraudulent or malicious intentions. "
        "key differences: it does not represent any threat, casual conversations, real plans, cordial exchanges, normal romantic relationships conversations. " if DEFINITIONS else ''}"
        
        f"{additional_examples_text}" # INSERCI√ìN DE EJEMPLOS ADICIONALES
        
        "APPROACH: Based on the previous, classify the following message into exactly one of these three categories: catfishing, sextortion, or harmless. "
        
        f"{"The examples contain a real conversation that includes one of the 3 types of scams being considered. Identify the Attacker: and Victim: labels as they will indicate the participation of each actor. " if EXAMPLES_DIR == 'scam-examples/real' and EXAMPLES else ""}"

        "LIMITS: "
        "- Respond ONLY with the corresponding label: catfishing, sextortion, or harmless. "
        "- DO NOT include explanations, justifications, or any additional text. "
        "- DO NOT use quotes, periods, or special characters in your response. Just the label. "
        
        f"Message to analyze: \"{text}\" "
    )
    
    # Petici√≥n directa sin streaming
    response = client.chat(model=model_name, messages=[{"role": "user", "content": prompt}], stream=False)
    response = response['message']['content'].strip().lower()
    response_without_thought = re.sub(r'.*\n+','',str(response), flags=re.DOTALL)#filtrado de la respuesta para eliminar la parte del pensamiento del texto
    # Filtrar la etiqueta v√°lida
    labels = ['catfishing', 'sextortion', 'harmless']
    found_labels = [label for label in labels if label in response_without_thought]
    #descartar si no se encontro una etiqueta valida o se encontraron mas de una
    if len(found_labels) == 1:
        prediction = found_labels[0]
    else:
        prediction = 'unclassified'
    return response_without_thought, prediction

def evaluate_model(model_name, dataset_to_use, additional_examples_text="", dictionary_text=""):
    """Ejecuta todas las predicciones y calcula m√©tricas para un modelo espec√≠fico."""
    y_true, y_pred_raw, y_pred_leaked = [], [], []
    prediction_times = []
    
    total_start_time = time.time()
    print(f"Evaluando {len(dataset_to_use)} muestras con el modelo {model_name}...")

    print("Progreso: 0%", end="")

    for i, sample in enumerate(dataset_to_use, 1):        
        individual_pred_start_time = time.time()
        rawPrediction, leakedPrediction = get_model_prediction(model_name, sample['text'], additional_examples_text, dictionary_text)
        individual_pred_end_time = time.time()
        
        y_true.append(sample['label'])
        y_pred_raw.append(rawPrediction)
        y_pred_leaked.append(leakedPrediction)
        prediction_times.append(individual_pred_end_time - individual_pred_start_time)

        percentage = (i / len(dataset_to_use)) * 100
        print(f"\rProgreso: {percentage:.1f}% ({i}/{len(dataset_to_use)} muestras)", end="", flush=True)
    
    total_end_time = time.time()
    total_evaluation_time = total_end_time - total_start_time
    
    print(f"\nCompletado: {len(dataset_to_use)} muestras procesadas para {model_name}.")
    
    possible_labels = sorted(list(set(y_true)))#se usa set para que no existan etiquetas repetidos
    report_dict = classification_report(y_true, y_pred_leaked, output_dict=True, zero_division=0, labels=possible_labels)
    report_dict['accuracy'] = accuracy_score(y_true, y_pred_leaked)
    timing_metrics = {
        'total_evaluation_time_seconds': total_evaluation_time,
        'average_time_per_prediction_seconds': statistics.mean(prediction_times) if prediction_times else 0,
        'min_prediction_time_seconds': min(prediction_times) if prediction_times else 0,
        'max_prediction_time_seconds': max(prediction_times) if prediction_times else 0,
        'samples_per_second': len(dataset_to_use) / total_evaluation_time if total_evaluation_time > 0 else 0,
        'total_evaluation_time_minutes': total_evaluation_time / 60
    }
    
    return {
        'labels' : {'etiquetas_reales':y_true, 'llm-output-predictions':y_pred_raw, 'leaked-prediction': y_pred_leaked},
        'classification_report': report_dict,
        'timing_metrics': timing_metrics
    }

def export_to_json(report_data, metadata_dict, filename_prefix):
    """Guarda el reporte con metadatos en un archivo JSON."""
    payload = {
        'metadata': metadata_dict,
        'results': report_data
    }
    safe_model_name = re.sub(r'[^\w\-_.]', '_', metadata_dict['model_name'])
    date_str = metadata_dict['evaluation_date'].split('T')[0]
    filename = f"{safe_model_name}_eval_{date_str}.json"
    
    path = os.path.join(OUTPUT_DIR, filename)
    with open(path, 'w', encoding='utf-8') as f:
        json.dump(payload, f, indent=4, ensure_ascii=False)
    print(f'‚úÖ JSON guardado en {path}')

def evaluate_all_models(current_dataset):
    """Eval√∫a todos los modelos disponibles en Ollama usando el dataset, ejemplos y diccionario."""
    ensure_output_dir()
    
    print(f"Cargando ejemplos y diccionario desde: {EXAMPLES_DIR}")
    loaded_examples_text, loaded_dictionary_text = load_scam_examples_and_dictionary()
    
    if loaded_examples_text:
        example_count = loaded_examples_text.count("<")
        print(f"Se cargaron {example_count} ejemplos.")
    # No 'else' needed due to print within load_scam_examples_and_dictionary

    if loaded_dictionary_text:
        dict_entry_count = loaded_dictionary_text.count("\n- ")
        print(f"Se cargaron {dict_entry_count} entradas de diccionario.")
    # No 'else' needed

    models_to_evaluate = get_available_models()
    
    if not models_to_evaluate:
        print("No se encontraron modelos nuevos para evaluar.")
        return
    
    print(f"\nü§ñ {len(list(models_to_evaluate))} modelos encontrados para evaluar:\n")
    print(*list(models_to_evaluate), sep="\n") 
    print(f"üìä Dataset con {len(current_dataset)} muestras.")
    print(f"üìÅ Resultados se guardar√°n en: {OUTPUT_DIR}/")
    
    for i, model_name_to_eval in enumerate(models_to_evaluate, 1):
        print(f"\n{'='*60}")
        print(f"EVALUANDO MODELO {i}/{len(models_to_evaluate)}: {model_name_to_eval}")
        print(f"{'='*60}")
        
        try:
            current_metadata = get_metadata(model_name_to_eval, models_to_evaluate[model_name_to_eval], current_dataset, bool(loaded_dictionary_text))
            
            evaluation_report = evaluate_model(model_name_to_eval, current_dataset, loaded_examples_text, loaded_dictionary_text)
            
            export_to_json(evaluation_report, current_metadata, model_name_to_eval)
            
            print(f"‚úÖ Evaluaci√≥n completada para {model_name_to_eval}")
            if 'classification_report' in evaluation_report and 'accuracy' in evaluation_report['classification_report']:
                print(f"   Accuracy: {evaluation_report['classification_report']['accuracy']:.4f}")
            if 'timing_metrics' in evaluation_report:
                print(f"   Tiempo total de evaluaci√≥n: {evaluation_report['timing_metrics']['total_evaluation_time_seconds']:.2f}s")

        except Exception as e:
            print(f"‚ùå Error evaluando modelo {model_name_to_eval}: {e}")
            import traceback
            traceback.print_exc()
            continue

# Inicializar cliente Ollama (asumiendo que ya est√° hecho o se har√° en otra celda)
try:
    client = Client(host='http://localhost:11434')
    client.list()
    print("üîå Conexi√≥n con Ollama establecida exitosamente.")
except Exception as e:
    print(f"‚ùå No se pudo conectar con Ollama en http://localhost:11434. Aseg√∫rate que est√© corriendo.")
    print(f"Error: {e}") 
    sys.exit(1)

# EJECUTAR EVALUACI√ìN: Aseg√∫rate que la variable 'dataset' est√© definida y 'client' inicializado.
if 'testDataset' in globals() and isinstance(testDataset, list) and len(testDataset) > 0 and 'client' in globals():
    print("\nüöÄ Iniciando evaluaci√≥n autom√°tica de todos los modelos...")
    evaluate_all_models(testDataset) 
    print("\nüéâ Evaluaci√≥n de todos los modelos completada.")
    

    if APAGADO is True:
        print("\nüí§ Programando apagado del equipo en 60 segundos...")
        try:
            subprocess.run(['shutdown', '/s', '/t', '60'], check=True)
            print("‚úÖ Apagado programado exitosamente.")
        except Exception as e:
            print(f"‚ùå Error programando apagado: {e}")
else:
    if 'testDataset' not in globals() or not isinstance(testDataset, list) or not len(testDataset) > 0:
        print("‚ö†Ô∏è La variable 'testDataset' no est√° definida o est√° vac√≠a. Por favor, define el testDataset en una celda anterior.")
    if 'client' not in globals():
        print("‚ö†Ô∏è La variable 'client' (Ollama client) no est√° inicializada. Por favor, inicial√≠zala.")