<a href="https://colab.research.google.com/github/JuanMr7/PipelineHateSpeech_Titulaci-n/blob/main/Darker_TheRialPipelineFuncional10de10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip uninstall scikit-learn -y
!pip install scikit-learn==1.3.1 # Or another compatible version
!pip install spacy
!python -m spacy download es_core_news_sm

Found existing installation: scikit-learn 1.6.0
Uninstalling scikit-learn-1.6.0:
  Successfully uninstalled scikit-learn-1.6.0
Collecting scikit-learn==1.3.1
  Downloading scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m35.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-learn
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
imbalanced-learn 0.13.0 requires scikit-learn<2,>=1.3.2, but you have scikit-learn 1.3.1 which is incompatible.[0m[31m
[0mSuccessfully installed scikit-learn-1.3.1
Collecting es-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/es_core_n

In [None]:
import pandas as pd
import pickle
import re
import spacy
from sklearn.base import BaseEstimator, TransformerMixin
from scipy import sparse

# Cargar modelo de spacy
nlp = spacy.load("es_core_news_sm")

class TextCleaner(BaseEstimator, TransformerMixin):
    def __init__(self, min_token_length=2):
        self.min_token_length = min_token_length

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return X['comment'].apply(self.limpiar_texto)

    def limpiar_texto(self, texto):
        if pd.isna(texto):
            return ""

        # Normalización básica
        texto = texto.lower()
        texto = re.sub(r'[\U00010000-\U0010ffff]', '', texto)
        texto = re.sub(r'@\w+', '', texto)
        texto = re.sub(r'#\w+', '', texto)
        texto = re.sub(r'\b(?:https?://|www\.)\S+\b', '', texto)

        # Procesamiento SpaCy
        doc = nlp(texto)
        tokens = [token.lemma_ for token in doc
                 if not token.is_punct
                 and len(token.text) >= self.min_token_length
                 and token.lemma_ != '-PRON-']

        return ' '.join(tokens)

class DenseTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return X.toarray() if sparse.issparse(X) else X

def cargar_pipeline(ruta_pipeline="/content/drive/MyDrive/DatasetsTitulacion/pipeline/pipeline_mejorado.pkl"):
    """
    Carga el pipeline entrenado desde un archivo
    """
    with open(ruta_pipeline, 'rb') as file:
        return pickle.load(file)

def predecir_texto(pipeline, texto):
    """
    Predice si un texto contiene discurso de odio

    Args:
        pipeline: Pipeline entrenado
        texto: Texto a analizar

    Returns:
        dict: Diccionario con los resultados del análisis
    """
    # Crear DataFrame con el texto
    texto_input = pd.DataFrame({'comment': [texto]})

    # Obtener predicción y probabilidades
    probabilidad = pipeline.predict_proba(texto_input)[0]
    prediccion = pipeline.predict(texto_input)[0]

    # Preparar resultado
    resultado = {
        'es_odio': prediccion == 1,
        'texto': texto,
        'probabilidad_odio': probabilidad[1],
        'etiqueta': 'Odio' if prediccion == 1 else 'No odio'
    }

    return resultado

if __name__ == "__main__":
    try:
        # Cargar el pipeline
        print("Cargando el modelo...")
        pipeline = cargar_pipeline()
        print("Modelo cargado exitosamente!")

        # Bucle para analizar textos
        print("\nEscribe un texto para analizar (o 'salir' para terminar):")

        while True:
            texto = input("\nTexto: ").strip()

            if texto.lower() == 'salir':
                break

            if not texto:
                print("Por favor, introduce algún texto.")
                continue

            try:
                resultado = predecir_texto(pipeline, texto)
                print(f"\nResultado: {resultado['etiqueta']}")
                print(f"Probabilidad de odio: {resultado['probabilidad_odio']:.3f}")

            except Exception as e:
                print(f"Error al procesar el texto: {str(e)}")

    except Exception as e:
        print(f"Error al cargar el modelo: {str(e)}")

Cargando el modelo...
Modelo cargado exitosamente!

Escribe un texto para analizar (o 'salir' para terminar):

Texto: salir
