In [58]:
# from google.colab import drive
# drive.mount('/content/drive')

In [59]:
# libraries
import numpy as np
import pandas as pd

In [60]:
import os
dir_en = "training_data/EN/raw-documents/"
all_files = os.listdir(dir_en)

files_array = []
names_files = []

for file_name in all_files:
  try:
    file = open(dir_en + file_name, "r")
    content = file.read()
    names_files = np.append(names_files, file_name)
    files_array = np.append(files_array, content)
    file.close()
  except:
    continue

files_array.shape

(64,)

In [61]:
# Reshape as a column vector
files_array = files_array.reshape(files_array.shape[0], 1)

In [62]:
texto_completo = []
articles_map = {}
for i in range(len(names_files)):
  articles_map[names_files[i]] = files_array[i]
  texto_completo.append(str(files_array[i][0]))

# Show the first articles
for key in list(articles_map.keys())[:5]:
  print(f"> Id: {key} - Sentence: {articles_map[key][0]}\n")

> Id: EN_CC_100000.txt - Sentence: Pentagon plans to serve LAB-GROWN MEAT to troops in the name of climate change 

 The Washington Free Beacon reported that BioMADE, a public-private partnership that has received more than $500 million from the DoD, is responsible for the endeavor. BioMADE announced on its website on June 3 that it is looking for new ideas to reduce the CO2 emissions from food production and transport at military sites. These ideas include "novel cell culture methods suitable for the production of cultivated meat or protein" â€“ essentially a euphemism for lab-grown fake meat.

Lab-grown meat is a new technology where animal muscle and fat tissues are grown from modified animal cells in special equipment. This process uses a mix of chemicals, pressure and temperature to create meat that resembles beef, chicken and pork. Though still in the experimental stage, lab-grown meat has sparked a debate about its efficiency and ethics of producing meat without killing animals.

In [63]:
annotations_1 = "training_data/EN/subtask-1-annotations.txt"

# Initialize a empty dataframe
df_entities = pd.DataFrame(columns=['article_id','entity_mention','start_offset','end_offset','main_role','fine-grained_roles'])
columns=['article_id','entity_mention','start_offset','end_offset','main_role','fine-grained_roles']

with open(annotations_1, encoding='utf-8') as file:
  lines_text_1 = file.readlines()  # Read line by line

for line in lines_text_1:
  line = line.strip()
  line = line.split("\t")
  new_row =  {columns[0]: line[0],
              columns[1]: line[1],
              columns[2]: line[2],
              columns[3]: line[3],
              columns[4]: line[4],
              columns[5]: line[5:]}
  df_entities = pd.concat([df_entities, pd.DataFrame([new_row])], ignore_index=True)

display(df_entities.head()) # Showing dataframe
print("\nSize:",df_entities.shape)

Unnamed: 0,article_id,entity_mention,start_offset,end_offset,main_role,fine-grained_roles
0,EN_UA_103861.txt,Chinese,791,797,Antagonist,[Spy]
1,EN_UA_103861.txt,China,1516,1520,Antagonist,[Instigator]
2,EN_UA_103861.txt,Hamas,2121,2125,Antagonist,[Terrorist]
3,EN_UA_103861.txt,Donald Trump,4909,4920,Protagonist,"[Peacemaker, Guardian]"
4,EN_UA_021270.txt,Yermak,667,672,Antagonist,[Incompetent]



Size: (414, 6)


## Extraccion del contexto de cada entidad para su respectiva clasificacion

Lo primero es realizar la division del conjunto de datos, el cual se va tratar por medio de k folds y tambien por holdout.

Se debe de tener en cuenta que de los datos que tenemos es que estamos intentando averiguar primero el contexto en donde se esta extrayendo la entidad. Bajo este contexto es el cual se va obtener tanto los principales roles como las etiquetas.

### Usando un sliding window
En este caso el approach a usar es un sliding window, donde se estara tomando las palaabras tanto antes y despues, o sea tomando una suerte del contexto de la palabra de en medio, que en este caso es la entidad.

In [64]:
def get_entity_contexts_with_offsets(df, text_dict, window):
    contexts = []
    for index, row in df.iterrows():
        text_id = row['article_id']
        start_offset = int(row['start_offset'])
        end_offset = int(row['end_offset'])
        entity_text = row['entity_mention']
        main_role = row['main_role']
        fine_grained_roles = row['fine-grained_roles']

        text = text_dict.get(text_id)

        if text:
            # Convert text to string if it's a NumPy array
            if isinstance(text, np.ndarray):  # Check if text is a NumPy array
                text = text.astype(str)
                text = " ".join(text)

            # Extract context using the sliding window method from the given offsets
            words = text.split()
            entity_start = len(text[:start_offset].split())
            entity_end = len(text[:end_offset].split())

            context_start = max(0, entity_start - window)
            context_end = min(len(words), entity_end + window)
            context = " ".join(words[context_start:context_end])
            contexts.append({
                'text_id': text_id,
                'entity': entity_text,
                'start_offset': start_offset,
                'end_offset': end_offset,
                'context': context,
                'main_role': main_role,
                'fine_grained_roles': fine_grained_roles

            })
    return pd.DataFrame(contexts)

### Preprocesamiento del texto

Para este caso el preprocesamiento va incluir:

1. Convertir en lowercase
2. Eliminar stop words
3. Convertir las palabras a vectores usando un word embedding

Esto sera aplicado a cada contexto de las entidades


In [65]:
!python -m spacy download en_core_web_sm

Defaulting to user installation because normal site-packages is not writeable
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
      --------------------------------------- 0.3/12.8 MB ? eta -:--:--
     ---- ----------------------------------- 1.6/12.8 MB 6.0 MB/s eta 0:00:02
     ----- ---------------------------------- 1.8/12.8 MB 4.4 MB/s eta 0:00:03
     --------------- ------------------------ 5.0/12.8 MB 7.2 MB/s eta 0:00:02
     -------------------- ------------------- 6.6/12.8 MB 7.3 MB/s eta 0:00:01
     -------------------------- ------------- 8.4/12.8 MB 7.3 MB/s eta 0:00:01
     ------------------------------- -------- 10.0/12.8 MB 7.4 MB/s eta 0:00:01
     ------------------------------------ --- 11.5/12.8 MB 7.4 MB/s eta 0:00:01
     ---------------------------------------- 1

In [66]:
import spacy
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords
import nltk

# Descargar las stopwords de NLTK si no lo has hecho aún
nltk.download("stopwords")

# Configuración inicial
stop_words = set(stopwords.words("english"))
nlp = spacy.load("en_core_web_sm")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Usuario\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [67]:
import string

# Este preprocesamiento simple se hara en todo el texto completo
def preprocess_simple(text):
    text = text.lower()
    text = text.translate(str.maketrans("", "", string.punctuation))
    return text

text = "Barack Obama was the 44th President of the United States. He was born in Hawaii and won a Nobel Prize."
processed_text = preprocess_simple(text)
print(processed_text)

barack obama was the 44th president of the united states he was born in hawaii and won a nobel prize


In [68]:
# Este preprocesamiento se hace sobre cada contexto extraido de las entidades
def preprocess_text(text):
    # 1. Convertir todo a lowercase
    text = text.lower()

    # 2. Tokenización con SpaCy y eliminación de stop words
    doc = nlp(text)
    filtered_tokens = [token.text for token in doc if token.text not in stop_words and not token.is_punct]
    preprocessed_text = " ".join(filtered_tokens)

    return preprocessed_text

text = "Barack Obama was the 44th President of the United States. He was born in Hawaii and won a Nobel Prize."
preprocessed_text = preprocess_text(text)
preprocessed_text

'barack obama 44th president united states born hawaii nobel prize'

## Aplicando ML

In [69]:
df_context = get_entity_contexts_with_offsets(df_entities,articles_map,window=3)
display(df_context.head())

Unnamed: 0,text_id,entity,start_offset,end_offset,context,main_role,fine_grained_roles
0,EN_UA_021270.txt,Yermak,667,672,Institute think tank. Yermak sought to assure,Antagonist,[Incompetent]
1,EN_UA_021270.txt,Zelensky,846,853,this is about Zelensky sending envoys to,Antagonist,[Incompetent]
2,EN_UA_021270.txt,Zelensky admin,1400,1413,"Without doubt, the Zelensky admin is in damage",Antagonist,[Incompetent]
3,EN_UA_021270.txt,Yermak,1900,1905,out of Ukraine. Yermak also sought to,Antagonist,[Traitor]
4,EN_UA_021270.txt,Zelensky's,2104,2113,He further emphasized Zelensky's continued rej...,Antagonist,[Bigot]


In [72]:
# Aplicando CounterVectorizer
def get_vectorizer(embedding_method):
  # 3. Vectorización del texto
  if embedding_method == "countvectorizer":
      vectorizer = CountVectorizer()
  elif embedding_method == "tfidf":
      from sklearn.feature_extraction.text import TfidfVectorizer
      vectorizer = TfidfVectorizer()
  else:
      raise ValueError("Método no válido. Usa 'countvectorizer' o 'tfidf'.")

  return vectorizer

vectorizer = get_vectorizer('countvectorizer')
# Texto completo
print(texto_completo[:5])

# Vectorizando el texto
arreglo = []
for texto in texto_completo:
    arreglo.append(preprocess_text(texto))
texto_completo = arreglo
vectorizer.fit(texto_completo)



In [73]:

from gensim.models import Word2Vec

def vectorize_text(text, model):
    words = text.split()
    word_vectors = [model.wv[word] for word in words if word in model.wv]
    return np.mean(word_vectors, axis=0) if word_vectors else np.zeros(model.vector_size)

# Corpus tokenizado
model = Word2Vec(texto_completo, vector_size=100, window=5, min_count=1, workers=4)

In [84]:
from tkinter.constants import X
x_train = []
y_train = []
for index, row in df_context.iterrows():

  context = row['context']

  #vector = vectorizer.transform([context])
  #X_train = vector.toarray()
  #x_train.append(X_train[0])

  # Usando word2vec
  
  X_train_vectorized = vectorize_text(context, model)
  
  x_train.append(X_train_vectorized)
  y_train.append(row['main_role'])

display(x_train[5:10])
display(y_train[5:10])

[array([-0.12965807,  0.2614058 ,  0.3825615 ,  0.06781039, -0.11290945,
        -0.13926621, -0.06132522,  0.34430897, -0.23678863, -0.37603298,
         0.09422948, -0.18563962,  0.02122516, -0.0111503 ,  0.37388954,
         0.18312204,  0.06768431,  0.2531791 , -0.2763136 , -0.26433483,
        -0.03195718,  0.08412216,  0.07166636,  0.04717037, -0.01265784,
         0.4143348 ,  0.04887148,  0.07996703,  0.06585085, -0.05598969,
        -0.10321087, -0.16216388, -0.10644387,  0.08324217,  0.06763124,
         0.0913385 ,  0.33835277, -0.19994356, -0.11828972,  0.2166864 ,
         0.19823532,  0.15470044,  0.05462378,  0.20333138,  0.09069134,
         0.16993013, -0.09627459,  0.08558156, -0.19222352, -0.14142717,
         0.05921348, -0.0903925 , -0.17199789, -0.05280112, -0.22773142,
        -0.0775095 , -0.01184193,  0.01513863,  0.34959212,  0.0258062 ,
        -0.05296008,  0.17243077,  0.17571567,  0.00411989,  0.12262824,
         0.3097409 ,  0.07663813,  0.15383767, -0.0

['Protagonist', 'Antagonist', 'Antagonist', 'Antagonist', 'Innocent']

### Se hace el entrenamiento, se va probar con el siguiente pipeline

In [85]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Divide los datos en entrenamiento y prueba
X = np.array(x_train)
y = np.array(y_train)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define los modelos y sus hiperparámetros para búsqueda
models = {
    "Random Forest": {
        "model": RandomForestClassifier(random_state=42),
        "params": {
            "n_estimators": [50, 100, 200],
            "max_depth": [None, 10, 20, 30],
            "min_samples_split": [2, 5, 10]
        }
    },
    "Logistic Regression": {
        "model": LogisticRegression(max_iter=1000, random_state=42),
        "params": {
            "C": [0.01, 0.1, 1, 10],
            "solver": ["lbfgs", "liblinear"]
        }
    },
    "KNN": {
        "model": KNeighborsClassifier(),
        "params": {
            "n_neighbors": [3, 5, 7],
            "weights": ["uniform", "distance"],
            "metric": ["euclidean", "manhattan"]
        }
    },
    "Multinomial Naive Bayes": {
        "model": MultinomialNB(),
        "params": {
            "alpha": [0.1, 0.5, 1, 5, 10]
        }
    }
}

# Entrena y evalúa cada modelo con GridSearchCV
for name, config in models.items():
    print(f"\nEntrenando y ajustando: {name}")

    # Configura la búsqueda de hiperparámetros
    try:
        grid = GridSearchCV(estimator=config["model"], param_grid=config["params"], cv=5, scoring="roc_auc_ovr", n_jobs=-1)
        grid.fit(X_train, y_train)

        # Mejores parámetros
        print(f"Mejores parámetros: {grid.best_params_}")

        # Predicción en el conjunto de prueba
        y_pred = grid.best_estimator_.predict(X_test)

        # Evaluación
        print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
        print("Classification Report:")
        print(classification_report(y_test, y_pred, target_names=["antagonista", "protagonista", "inocente"]))
    except:
        print(f"El modelo {name} no se pudo ejecutar")



Entrenando y ajustando: Random Forest
Mejores parámetros: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Accuracy: 0.59
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.59      1.00      0.74        16
protagonista       0.00      0.00      0.00         2
    inocente       0.00      0.00      0.00         9

    accuracy                           0.59        27
   macro avg       0.20      0.33      0.25        27
weighted avg       0.35      0.59      0.44        27


Entrenando y ajustando: Logistic Regression
Mejores parámetros: {'C': 0.01, 'solver': 'lbfgs'}
Accuracy: 0.59
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.59      1.00      0.74        16
protagonista       0.00      0.00      0.00         2
    inocente       0.00      0.00      0.00         9

    accuracy                           0.59        27
   macro avg       0.20      0.33      0.25        2

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
        nan 0.51710594        nan 0.52041281        nan 0.52041281]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Usando TFID para vectorizar

In [None]:
vectorizar_tfid = get_vectorizer('tfidf')
vectorizar_tfid.fit(texto_completo)

x_train = []
y_train = []
for index, row in df_context.iterrows():

  context = row['context']

  vector = vectorizar_tfid.transform([context])

  X_train = vector.toarray()
  x_train.append(X_train[0])

  y_train.append(row['main_role'])

display(x_train[:5])
display(y_train[:5])

[array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.])]

['Antagonist', 'Antagonist', 'Antagonist', 'Antagonist', 'Antagonist']

In [None]:
# Divide los datos en entrenamiento y prueba
X = np.array(x_train)
y = np.array(y_train)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define los modelos y sus hiperparámetros para búsqueda
models = {
    "Random Forest": {
        "model": RandomForestClassifier(random_state=42),
        "params": {
            "n_estimators": [50, 100, 200],
            "max_depth": [None, 10, 20, 30],
            "min_samples_split": [2, 5, 10]
        }
    },
    "Logistic Regression": {
        "model": LogisticRegression(max_iter=1000, random_state=42),
        "params": {
            "C": [0.01, 0.1, 1, 10],
            "solver": ["lbfgs", "liblinear"]
        }
    },
    "KNN": {
        "model": KNeighborsClassifier(),
        "params": {
            "n_neighbors": [3, 5, 7, 9, 10, 12, 14],
            "weights": ["uniform", "distance"],
            "metric": ["euclidean", "manhattan"]
        }
    },
    "Multinomial Naive Bayes": {
        "model": MultinomialNB(),
        "params": {
            "alpha": [0.1, 0.5, 1, 5, 10]
        }
    }
}

# Entrena y evalúa cada modelo con GridSearchCV
for name, config in models.items():
    print(f"\nEntrenando y ajustando: {name}")

    try:
        # Configura la búsqueda de hiperparámetros
        grid = GridSearchCV(estimator=config["model"], param_grid=config["params"], cv=5, scoring="roc_auc_ovr", n_jobs=-1)
        grid.fit(X_train, y_train)

        # Mejores parámetros
        print(f"Mejores parámetros: {grid.best_params_}")

        # Predicción en el conjunto de prueba
        y_pred = grid.best_estimator_.predict(X_test)

        # Evaluación
        print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
        print("Classification Report:")
        print(classification_report(y_test, y_pred, target_names=["antagonista", "protagonista", "inocente"]))
    except:
        print(f"El modelo {name} no se pudo ejecutar")



Entrenando y ajustando: Random Forest
Mejores parámetros: {'max_depth': 30, 'min_samples_split': 10, 'n_estimators': 50}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.64      1.00      0.78        16
protagonista       0.00      0.00      0.00         2
    inocente       0.50      0.11      0.18         9

    accuracy                           0.63        27
   macro avg       0.38      0.37      0.32        27
weighted avg       0.55      0.63      0.52        27


Entrenando y ajustando: Logistic Regression


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'C': 0.1, 'solver': 'lbfgs'}
Accuracy: 0.59
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.59      1.00      0.74        16
protagonista       0.00      0.00      0.00         2
    inocente       0.00      0.00      0.00         9

    accuracy                           0.59        27
   macro avg       0.20      0.33      0.25        27
weighted avg       0.35      0.59      0.44        27


Entrenando y ajustando: KNN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 0.70864748 0.73801159 0.71525416 0.74069624 0.72737615 0.75956129
 0.73958214 0.75452185        nan 0.70661775        nan 0.71635702
        nan 0.76757224        nan 0.77760655        nan 0.76012317
        nan 0.77730314        nan 0.76032742]


Mejores parámetros: {'metric': 'manhattan', 'n_neighbors': 9, 'weights': 'distance'}
Accuracy: 0.56
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.58      0.88      0.70        16
protagonista       0.00      0.00      0.00         2
    inocente       0.33      0.11      0.17         9

    accuracy                           0.56        27
   macro avg       0.31      0.33      0.29        27
weighted avg       0.46      0.56      0.47        27


Entrenando y ajustando: Multinomial Naive Bayes
Mejores parámetros: {'alpha': 5}
Accuracy: 0.59
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.59      1.00      0.74        16
protagonista       0.00      0.00      0.00         2
    inocente       0.00      0.00      0.00         9

    accuracy                           0.59        27
   macro avg       0.20      0.33      0.25        27
weighted avg       0.35      0.59      0.44       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Probando diferentes tamaños de ventanas

In [None]:
# Abstracion en metodo
def test_algoritthms(x_train, y_train):
  # Divide los datos en entrenamiento y prueba
  X = np.array(x_train)
  y = np.array(y_train)

  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

  # Define los modelos y sus hiperparámetros para búsqueda
  models = {
      "Random Forest": {
          "model": RandomForestClassifier(random_state=42),
          "params": {
              "n_estimators": [50, 100, 200],
              "max_depth": [None, 10, 20, 30],
              "min_samples_split": [2, 5, 10]
          }
      },
      "Logistic Regression": {
          "model": LogisticRegression(max_iter=1000, random_state=42),
          "params": {
              "C": [0.01, 0.1, 1, 10],
              "solver": ["lbfgs", "liblinear"]
          }
      },
      "KNN": {
          "model": KNeighborsClassifier(),
          "params": {
              "n_neighbors": [3, 5, 7, 9, 10, 12, 14],
              "weights": ["uniform", "distance"],
              "metric": ["euclidean", "manhattan"]
          }
      },
      "Multinomial Naive Bayes": {
          "model": MultinomialNB(),
          "params": {
              "alpha": [0.1, 0.5, 1, 5, 10]
          }
      }
  }

  # Entrena y evalúa cada modelo con GridSearchCV
  for name, config in models.items():
    print(f"\nEntrenando y ajustando: {name}")

    try:
        # Configura la búsqueda de hiperparámetros
        grid = GridSearchCV(estimator=config["model"], param_grid=config["params"], cv=5, scoring="roc_auc_ovr", n_jobs=-1)
        grid.fit(X_train, y_train)

        # Mejores parámetros
        print(f"Mejores parámetros: {grid.best_params_}")

        # Predicción en el conjunto de prueba
        y_pred = grid.best_estimator_.predict(X_test)

        # Evaluación
        print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
        print("Classification Report:")
        print(classification_report(y_test, y_pred, target_names=["antagonista", "protagonista", "inocente"]))
    except:
        print(f"El modelo {name} no se pudo ejecutar")


In [87]:
for ventana in range(1,11):
  print(f"### Ventana usada: {ventana} ###")
  df_context = get_entity_contexts_with_offsets(df_entities,articles_map,window=ventana)

  x_train = []
  y_train = []
  for index, row in df_context.iterrows():

    context = row['context']

    vector = vectorizer.transform([context])

    X_train = vector.toarray()
    x_train.append(X_train[0])

    y_train.append(row['main_role'])

  test_algoritthms(x_train,y_train)
  print("##########################\n\n")

### Ventana usada: 1 ###

Entrenando y ajustando: Random Forest
Mejores parámetros: {'max_depth': 30, 'min_samples_split': 10, 'n_estimators': 50}
Accuracy: 0.59
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.60      0.94      0.73        16
protagonista       0.00      0.00      0.00         2
    inocente       0.50      0.11      0.18         9

    accuracy                           0.59        27
   macro avg       0.37      0.35      0.30        27
weighted avg       0.52      0.59      0.49        27


Entrenando y ajustando: Logistic Regression


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'C': 1, 'solver': 'liblinear'}
Accuracy: 0.56
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.65      0.81      0.72        16
protagonista       0.00      0.00      0.00         2
    inocente       0.29      0.22      0.25         9

    accuracy                           0.56        27
   macro avg       0.31      0.34      0.32        27
weighted avg       0.48      0.56      0.51        27


Entrenando y ajustando: KNN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'metric': 'manhattan', 'n_neighbors': 10, 'weights': 'uniform'}
Accuracy: 0.59
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.59      1.00      0.74        16
protagonista       0.00      0.00      0.00         2
    inocente       0.00      0.00      0.00         9

    accuracy                           0.59        27
   macro avg       0.20      0.33      0.25        27
weighted avg       0.35      0.59      0.44        27

##########################


### Ventana usada: 2 ###

Entrenando y ajustando: Random Forest


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 200}
Accuracy: 0.59
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.59      1.00      0.74        16
protagonista       0.00      0.00      0.00         2
    inocente       0.00      0.00      0.00         9

    accuracy                           0.59        27
   macro avg       0.20      0.33      0.25        27
weighted avg       0.35      0.59      0.44        27


Entrenando y ajustando: Logistic Regression


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'C': 1, 'solver': 'liblinear'}
Accuracy: 0.67
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.70      0.88      0.78        16
protagonista       0.00      0.00      0.00         2
    inocente       0.57      0.44      0.50         9

    accuracy                           0.67        27
   macro avg       0.42      0.44      0.43        27
weighted avg       0.61      0.67      0.63        27


Entrenando y ajustando: KNN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'metric': 'manhattan', 'n_neighbors': 10, 'weights': 'distance'}
Accuracy: 0.67
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.65      0.94      0.77        16
protagonista       0.00      0.00      0.00         2
    inocente       0.75      0.33      0.46         9

    accuracy                           0.67        27
   macro avg       0.47      0.42      0.41        27
weighted avg       0.64      0.67      0.61        27

##########################


### Ventana usada: 3 ###

Entrenando y ajustando: Random Forest


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 50}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.62      1.00      0.76        16
protagonista       0.00      0.00      0.00         2
    inocente       1.00      0.11      0.20         9

    accuracy                           0.63        27
   macro avg       0.54      0.37      0.32        27
weighted avg       0.70      0.63      0.52        27


Entrenando y ajustando: Logistic Regression


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'C': 0.1, 'solver': 'lbfgs'}
Accuracy: 0.67
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.64      1.00      0.78        16
protagonista       0.00      0.00      0.00         2
    inocente       1.00      0.22      0.36         9

    accuracy                           0.67        27
   macro avg       0.55      0.41      0.38        27
weighted avg       0.71      0.67      0.58        27


Entrenando y ajustando: KNN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'metric': 'manhattan', 'n_neighbors': 12, 'weights': 'distance'}
Accuracy: 0.70
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.70      1.00      0.82        16
protagonista       0.00      0.00      0.00         2
    inocente       0.75      0.33      0.46         9

    accuracy                           0.70        27
   macro avg       0.48      0.44      0.43        27
weighted avg       0.66      0.70      0.64        27

##########################


### Ventana usada: 4 ###

Entrenando y ajustando: Random Forest


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.64      1.00      0.78        16
protagonista       0.00      0.00      0.00         2
    inocente       0.50      0.11      0.18         9

    accuracy                           0.63        27
   macro avg       0.38      0.37      0.32        27
weighted avg       0.55      0.63      0.52        27


Entrenando y ajustando: Logistic Regression


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'C': 0.1, 'solver': 'lbfgs'}
Accuracy: 0.67
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.64      1.00      0.78        16
protagonista       0.00      0.00      0.00         2
    inocente       1.00      0.22      0.36         9

    accuracy                           0.67        27
   macro avg       0.55      0.41      0.38        27
weighted avg       0.71      0.67      0.58        27


Entrenando y ajustando: KNN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}
Accuracy: 0.67
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.76      0.81      0.79        16
protagonista       0.00      0.00      0.00         2
    inocente       0.56      0.56      0.56         9

    accuracy                           0.67        27
   macro avg       0.44      0.46      0.45        27
weighted avg       0.64      0.67      0.65        27

##########################


### Ventana usada: 5 ###

Entrenando y ajustando: Random Forest
Mejores parámetros: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 200}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.62      1.00      0.76        16
protagonista       0.00      0.00      0.00         2
    inocente       1.00      0.11      0.20         9

    accuracy                           0.63        27
   macro a

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'C': 1, 'solver': 'lbfgs'}
Accuracy: 0.67
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.67      1.00      0.80        16
protagonista       0.00      0.00      0.00         2
    inocente       0.67      0.22      0.33         9

    accuracy                           0.67        27
   macro avg       0.44      0.41      0.38        27
weighted avg       0.62      0.67      0.59        27


Entrenando y ajustando: KNN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'distance'}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.67      0.88      0.76        16
protagonista       0.00      0.00      0.00         2
    inocente       0.50      0.33      0.40         9

    accuracy                           0.63        27
   macro avg       0.39      0.40      0.39        27
weighted avg       0.56      0.63      0.58        27

##########################


### Ventana usada: 6 ###

Entrenando y ajustando: Random Forest


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 100}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.62      1.00      0.76        16
protagonista       0.00      0.00      0.00         2
    inocente       1.00      0.11      0.20         9

    accuracy                           0.63        27
   macro avg       0.54      0.37      0.32        27
weighted avg       0.70      0.63      0.52        27


Entrenando y ajustando: Logistic Regression


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'C': 0.1, 'solver': 'lbfgs'}
Accuracy: 0.67
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.64      1.00      0.78        16
protagonista       0.00      0.00      0.00         2
    inocente       1.00      0.22      0.36         9

    accuracy                           0.67        27
   macro avg       0.55      0.41      0.38        27
weighted avg       0.71      0.67      0.58        27


Entrenando y ajustando: KNN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'metric': 'manhattan', 'n_neighbors': 9, 'weights': 'distance'}
Accuracy: 0.67
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.68      0.94      0.79        16
protagonista       0.00      0.00      0.00         2
    inocente       0.60      0.33      0.43         9

    accuracy                           0.67        27
   macro avg       0.43      0.42      0.41        27
weighted avg       0.60      0.67      0.61        27

##########################


### Ventana usada: 7 ###

Entrenando y ajustando: Random Forest


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'max_depth': 20, 'min_samples_split': 5, 'n_estimators': 50}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.62      1.00      0.76        16
protagonista       0.00      0.00      0.00         2
    inocente       1.00      0.11      0.20         9

    accuracy                           0.63        27
   macro avg       0.54      0.37      0.32        27
weighted avg       0.70      0.63      0.52        27


Entrenando y ajustando: Logistic Regression


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'C': 0.1, 'solver': 'lbfgs'}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.62      0.94      0.75        16
protagonista       0.00      0.00      0.00         2
    inocente       0.67      0.22      0.33         9

    accuracy                           0.63        27
   macro avg       0.43      0.39      0.36        27
weighted avg       0.59      0.63      0.56        27


Entrenando y ajustando: KNN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'metric': 'manhattan', 'n_neighbors': 14, 'weights': 'distance'}
Accuracy: 0.67
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.68      0.94      0.79        16
protagonista       0.00      0.00      0.00         2
    inocente       0.60      0.33      0.43         9

    accuracy                           0.67        27
   macro avg       0.43      0.42      0.41        27
weighted avg       0.60      0.67      0.61        27

##########################


### Ventana usada: 8 ###

Entrenando y ajustando: Random Forest


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Accuracy: 0.67
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.64      1.00      0.78        16
protagonista       0.00      0.00      0.00         2
    inocente       1.00      0.22      0.36         9

    accuracy                           0.67        27
   macro avg       0.55      0.41      0.38        27
weighted avg       0.71      0.67      0.58        27


Entrenando y ajustando: Logistic Regression


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'C': 0.1, 'solver': 'lbfgs'}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.62      0.94      0.75        16
protagonista       0.00      0.00      0.00         2
    inocente       0.67      0.22      0.33         9

    accuracy                           0.63        27
   macro avg       0.43      0.39      0.36        27
weighted avg       0.59      0.63      0.56        27


Entrenando y ajustando: KNN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'metric': 'euclidean', 'n_neighbors': 10, 'weights': 'distance'}
Accuracy: 0.52
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.58      0.69      0.63        16
protagonista       0.00      0.00      0.00         2
    inocente       0.38      0.33      0.35         9

    accuracy                           0.52        27
   macro avg       0.32      0.34      0.33        27
weighted avg       0.47      0.52      0.49        27

##########################


### Ventana usada: 9 ###

Entrenando y ajustando: Random Forest


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.62      1.00      0.76        16
protagonista       0.00      0.00      0.00         2
    inocente       1.00      0.11      0.20         9

    accuracy                           0.63        27
   macro avg       0.54      0.37      0.32        27
weighted avg       0.70      0.63      0.52        27


Entrenando y ajustando: Logistic Regression


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'C': 0.1, 'solver': 'lbfgs'}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.62      0.94      0.75        16
protagonista       0.00      0.00      0.00         2
    inocente       0.67      0.22      0.33         9

    accuracy                           0.63        27
   macro avg       0.43      0.39      0.36        27
weighted avg       0.59      0.63      0.56        27


Entrenando y ajustando: KNN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'distance'}
Accuracy: 0.48
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.64      0.44      0.52        16
protagonista       0.00      0.00      0.00         2
    inocente       0.38      0.67      0.48         9

    accuracy                           0.48        27
   macro avg       0.34      0.37      0.33        27
weighted avg       0.50      0.48      0.47        27

##########################


### Ventana usada: 10 ###

Entrenando y ajustando: Random Forest


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.62      1.00      0.76        16
protagonista       0.00      0.00      0.00         2
    inocente       1.00      0.11      0.20         9

    accuracy                           0.63        27
   macro avg       0.54      0.37      0.32        27
weighted avg       0.70      0.63      0.52        27


Entrenando y ajustando: Logistic Regression


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'C': 0.01, 'solver': 'lbfgs'}
Accuracy: 0.63
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.62      1.00      0.76        16
protagonista       0.00      0.00      0.00         2
    inocente       1.00      0.11      0.20         9

    accuracy                           0.63        27
   macro avg       0.54      0.37      0.32        27
weighted avg       0.70      0.63      0.52        27


Entrenando y ajustando: KNN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Mejores parámetros: {'metric': 'manhattan', 'n_neighbors': 12, 'weights': 'distance'}
Accuracy: 0.52
Classification Report:
              precision    recall  f1-score   support

 antagonista       0.61      0.69      0.65        16
protagonista       0.00      0.00      0.00         2
    inocente       0.33      0.33      0.33         9

    accuracy                           0.52        27
   macro avg       0.31      0.34      0.33        27
weighted avg       0.47      0.52      0.49        27

##########################




  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
