In [17]:
import pandas as pd
import json
import re
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report


#Carga de datos
train_data = pd.read_csv("../../Data/BasedOnEkman/train_ekman.csv")
test_data = pd.read_csv("../../Data/BasedOnEkman/test_ekman.csv")
valid_data = pd.read_csv("../../Data/BasedOnEkman/valid_ekman.csv")



# Cargar el mapeo de Ekman desde el archivo JSON
with open("../../Data/GoEmotions/ekman_mapping.json", "r") as f:
    ekman_mapping = json.load(f)
ekman_labels = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral']
ekman_to_id = {label: idx for idx, label in enumerate(ekman_labels)}

# --- PREPROCESAMIENTO PARA LEMATIZACIÓN ---
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    words = re.findall(r'\b\w+\b', text.lower())
    lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
    return " ".join(lemmatized_words)

# Datos
X_train_raw = train_data['Text'].values
X_test_raw = test_data['Text'].values
y_train = train_data['Emotion'].values
y_test = test_data['Emotion'].values

# Datos lematizados
X_train_lem = [preprocess_text(text) for text in X_train_raw]
X_test_lem = [preprocess_text(text) for text in X_test_raw]


In [18]:
# --- DIRECTORIO DE SALIDA DE LOS PLOTS ---
output_dir = "../../Plots/Experiment1/Ekman/Experiment/"


import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Función para graficar matriz de confusión
def plot_confusion_matrix(y_true, y_pred, labels, title):
    cm = confusion_matrix(y_true, y_pred, labels=list(range(len(labels))))

    plt.figure(figsize=(10, 8))
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Blues", 
        xticklabels=labels,
        yticklabels=labels,
        cbar=True
    )
    plt.title(f'Matriz de Confusión - {title}')
    plt.ylabel('Etiqueta Verdadera')
    plt.xlabel('Etiqueta Predicha')
    plt.xticks(rotation=45)
    plt.yticks(rotation=0)
    plt.tight_layout()
    # Guardar archivo
    filename = title.replace(" ", "_").replace("(", "").replace(")", "").lower() + ".png"
    plt.savefig(output_dir + filename)
    plt.close()
    
    

In [19]:
# --- FUNCION GENERAL PARA COMPARAR ---
def train_and_evaluate(model, X_train, y_train, X_test, y_test, label):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"\n--- {label} ---")
    print(f"Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred))
    # Mostrar matriz de confusión
    
    plot_confusion_matrix(y_test, y_pred, ekman_labels, label)

In [20]:
# --- VECTORIZADORES ---
vectorizer_raw = TfidfVectorizer(max_features=10000)
vectorizer_stopwords = TfidfVectorizer(max_features=10000, stop_words='english')
vectorizer_lowercase = TfidfVectorizer(max_features=10000, lowercase=True)
vectorizer_strip = TfidfVectorizer(max_features=10000, strip_accents='unicode')
vectorizer_stopwords_lowercase = TfidfVectorizer(max_features=10000, stop_words='english', lowercase=True)
vectorizer_strip_lowercase = TfidfVectorizer(max_features=10000, strip_accents='unicode', lowercase=True)
vectorizer_complete = TfidfVectorizer(max_features=10000, stop_words='english', lowercase=True, strip_accents='unicode')

#TRAIN Y TEST SIN LEMATIZACIÓN
X_train_raw_tfidf = vectorizer_raw.fit_transform(X_train_raw)
X_test_raw_tfidf = vectorizer_raw.transform(X_test_raw)

X_train_raw_tfidf_stopwords = vectorizer_stopwords.fit_transform(X_train_raw)
X_test_raw_tfidf_stopwords = vectorizer_stopwords.transform(X_test_raw)

X_train_raw_tfidf_lowercase = vectorizer_lowercase.fit_transform(X_train_raw)
X_test_raw_tfidf_lowercase = vectorizer_lowercase.transform(X_test_raw)

X_train_raw_tfidf_strip = vectorizer_strip.fit_transform(X_train_raw)
X_test_raw_tfidf_strip = vectorizer_strip.transform(X_test_raw)

X_train_raw_tfidf_stopwords_lowercase = vectorizer_stopwords_lowercase.fit_transform(X_train_raw)
X_test_raw_tfidf_stopwords_lowercase = vectorizer_stopwords_lowercase.transform(X_test_raw)

X_train_raw_tfidf_strip_lowercase = vectorizer_strip_lowercase.fit_transform(X_train_raw)
X_test_raw_tfidf_strip_lowercase = vectorizer_strip_lowercase.transform(X_test_raw)

X_train_raw_tfidf_complete = vectorizer_complete.fit_transform(X_train_raw)
X_test_raw_tfidf_complete = vectorizer_complete.transform(X_test_raw)

#TRAIN Y TEST PARA LEMATIZACIÓN
X_train_lem_tfidf = vectorizer_raw.fit_transform(X_train_lem)
X_test_lem_tfidf = vectorizer_raw.transform(X_test_lem)

X_train_lem_tfidf_stopwords = vectorizer_stopwords.fit_transform(X_train_lem)
X_test_lem_tfidf_stopwords = vectorizer_stopwords.transform(X_test_lem)

X_train_lem_tfidf_lowercase = vectorizer_lowercase.fit_transform(X_train_lem)
X_test_lem_tfidf_lowercase = vectorizer_lowercase.transform(X_test_lem)

X_train_lem_tfidf_strip = vectorizer_strip.fit_transform(X_train_lem)
X_test_lem_tfidf_strip = vectorizer_strip.transform(X_test_lem)

X_train_lem_tfidf_stopwords_lowercase = vectorizer_stopwords_lowercase.fit_transform(X_train_lem)
X_test_lem_tfidf_stopwords_lowercase = vectorizer_stopwords_lowercase.transform(X_test_lem)

X_train_lem_tfidf_strip_lowercase = vectorizer_strip_lowercase.fit_transform(X_train_lem)
X_test_lem_tfidf_strip_lowercase = vectorizer_strip_lowercase.transform(X_test_lem)

X_train_lem_tfidf_complete = vectorizer_complete.fit_transform(X_train_lem)
X_test_lem_tfidf_complete = vectorizer_complete.transform(X_test_lem)





In [21]:
#Modelos de clasificación
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
    'SVM (linear)': SVC(kernel='linear'),
    'Decision Tree': DecisionTreeClassifier(random_state=42)
}

In [22]:
# --- COMPARATIVA FINAL ---
print("\n-- TF-IDF --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf, y_train, X_test_raw_tfidf, y_test, f"{model_name} (TF-IDF)")

print("\n-- TF-IDF SIN STOPWORDS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_stopwords, y_train, X_test_raw_tfidf_stopwords, y_test, f"{model_name} (TF-IDF sin stopwords)")

print("\n-- TF-IDF EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_lowercase, y_train, X_test_raw_tfidf_lowercase, y_test, f"{model_name} (TF-IDF en minusculas)")

print("\n-- TF-IDF SIN ACENTOS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_strip, y_train, X_test_raw_tfidf_strip, y_test, f"{model_name} (TF-IDF sin acentos)")

print("\n-- TF-IDF SIN STOPWORDS Y EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_stopwords_lowercase, y_train, X_test_raw_tfidf_stopwords_lowercase, y_test, f"{model_name} (TF-IDF sin stopwords y en minusculas)")

print("\n-- TF-IDF SIN ACENTOS Y EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_strip_lowercase, y_train, X_test_raw_tfidf_strip_lowercase, y_test, f"{model_name} (TF-IDF sin acentos y en minusculas)")

print("\n-- TF-IDF COMPLETO --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_complete, y_train, X_test_raw_tfidf_complete, y_test, f"{model_name} (TF-IDF completo)")


-- TF-IDF --

--- Logistic Regression (TF-IDF) ---
Accuracy: 0.6329
              precision    recall  f1-score   support

           0       0.57      0.30      0.39       572
           1       0.69      0.26      0.38        76
           2       0.88      0.29      0.43        80
           3       0.76      0.77      0.77      1863
           4       0.70      0.34      0.46       283
           5       0.56      0.26      0.36       488
           6       0.54      0.79      0.64      1606

    accuracy                           0.63      4968
   macro avg       0.67      0.43      0.49      4968
weighted avg       0.64      0.63      0.61      4968


--- SVM (linear) (TF-IDF) ---
Accuracy: 0.6449
              precision    recall  f1-score   support

           0       0.57      0.32      0.41       572
           1       0.57      0.38      0.46        76
           2       0.76      0.51      0.61        80
           3       0.81      0.75      0.78      1863
           4   

-- TF-IDF --

--- Logistic Regression (TF-IDF) ---
Accuracy: 0.6329
              precision    recall  f1-score   support

           0       0.57      0.30      0.39       572
           1       0.69      0.26      0.38        76
           2       0.88      0.29      0.43        80
           3       0.76      0.77      0.77      1863
           4       0.70      0.34      0.46       283
           5       0.56      0.26      0.36       488
           6       0.54      0.79      0.64      1606

    accuracy                           0.63      4968
   macro avg       0.67      0.43      0.49      4968
weighted avg       0.64      0.63      0.61      4968


--- SVM (linear) (TF-IDF) ---
Accuracy: 0.6449
              precision    recall  f1-score   support

           0       0.57      0.32      0.41       572
           1       0.57      0.38      0.46        76
           2       0.76      0.51      0.61        80
           3       0.81      0.75      0.78      1863
           4       0.70      0.46      0.55       283
           5       0.59      0.25      0.35       488
           6       0.54      0.81      0.65      1606

    accuracy                           0.64      4968
   macro avg       0.65      0.50      0.54      4968
weighted avg       0.66      0.64      0.63      4968


--- Decision Tree (TF-IDF) ---
Accuracy: 0.5280
              precision    recall  f1-score   support

           0       0.33      0.29      0.31       572
           1       0.33      0.28      0.30        76
           2       0.33      0.23      0.27        80
           3       0.68      0.69      0.69      1863
           4       0.43      0.31      0.36       283
           5       0.31      0.26      0.28       488
           6       0.50      0.56      0.53      1606

    accuracy                           0.53      4968
   macro avg       0.41      0.38      0.39      4968
weighted avg       0.52      0.53      0.52      4968


-- TF-IDF SIN STOPWORDS --

--- Logistic Regression (TF-IDF sin stopwords) ---
Accuracy: 0.6339
              precision    recall  f1-score   support

           0       0.58      0.30      0.39       572
           1       0.71      0.29      0.41        76
           2       0.87      0.34      0.49        80
           3       0.77      0.77      0.77      1863
           4       0.68      0.36      0.47       283
           5       0.59      0.20      0.29       488
           6       0.53      0.81      0.64      1606

    accuracy                           0.63      4968
   macro avg       0.68      0.44      0.50      4968
weighted avg       0.65      0.63      0.61      4968


--- SVM (linear) (TF-IDF sin stopwords) ---
Accuracy: 0.6381
              precision    recall  f1-score   support

           0       0.57      0.33      0.42       572
           1       0.57      0.41      0.48        76
           2       0.70      0.53      0.60        80
           3       0.79      0.75      0.77      1863
           4       0.67      0.45      0.54       283
           5       0.66      0.17      0.27       488
           6       0.53      0.81      0.64      1606

    accuracy                           0.64      4968
   macro avg       0.64      0.49      0.53      4968
weighted avg       0.66      0.64      0.62      4968


--- Decision Tree (TF-IDF sin stopwords) ---
Accuracy: 0.5672
              precision    recall  f1-score   support

           0       0.36      0.31      0.33       572
           1       0.46      0.32      0.38        76
           2       0.60      0.44      0.51        80
           3       0.70      0.74      0.72      1863
           4       0.52      0.42      0.46       283
           5       0.34      0.28      0.31       488
           6       0.54      0.59      0.56      1606

    accuracy                           0.57      4968
   macro avg       0.50      0.44      0.47      4968
weighted avg       0.56      0.57      0.56      4968


-- TF-IDF EN MINUSCULAS --

--- Logistic Regression (TF-IDF en minusculas) ---
Accuracy: 0.6329
              precision    recall  f1-score   support

           0       0.57      0.30      0.39       572
           1       0.69      0.26      0.38        76
           2       0.88      0.29      0.43        80
           3       0.76      0.77      0.77      1863
           4       0.70      0.34      0.46       283
           5       0.56      0.26      0.36       488
           6       0.54      0.79      0.64      1606

    accuracy                           0.63      4968
   macro avg       0.67      0.43      0.49      4968
weighted avg       0.64      0.63      0.61      4968


--- SVM (linear) (TF-IDF en minusculas) ---
Accuracy: 0.6449
              precision    recall  f1-score   support

           0       0.57      0.32      0.41       572
           1       0.57      0.38      0.46        76
           2       0.76      0.51      0.61        80
           3       0.81      0.75      0.78      1863
           4       0.70      0.46      0.55       283
           5       0.59      0.25      0.35       488
           6       0.54      0.81      0.65      1606

    accuracy                           0.64      4968
   macro avg       0.65      0.50      0.54      4968
weighted avg       0.66      0.64      0.63      4968


--- Decision Tree (TF-IDF en minusculas) ---
Accuracy: 0.5280
              precision    recall  f1-score   support

           0       0.33      0.29      0.31       572
           1       0.33      0.28      0.30        76
           2       0.33      0.23      0.27        80
           3       0.68      0.69      0.69      1863
           4       0.43      0.31      0.36       283
           5       0.31      0.26      0.28       488
           6       0.50      0.56      0.53      1606

    accuracy                           0.53      4968
   macro avg       0.41      0.38      0.39      4968
weighted avg       0.52      0.53      0.52      4968


-- TF-IDF SIN ACENTOS --

--- Logistic Regression (TF-IDF sin acentos) ---
Accuracy: 0.6333
              precision    recall  f1-score   support

           0       0.58      0.30      0.40       572
           1       0.68      0.25      0.37        76
           2       0.88      0.29      0.43        80
           3       0.76      0.77      0.77      1863
           4       0.70      0.34      0.46       283
           5       0.56      0.27      0.36       488
           6       0.54      0.79      0.64      1606

    accuracy                           0.63      4968
   macro avg       0.67      0.43      0.49      4968
weighted avg       0.65      0.63      0.61      4968


--- SVM (linear) (TF-IDF sin acentos) ---
Accuracy: 0.6439
              precision    recall  f1-score   support

           0       0.58      0.31      0.41       572
           1       0.57      0.38      0.46        76
           2       0.74      0.53      0.61        80
           3       0.81      0.75      0.78      1863
           4       0.70      0.46      0.55       283
           5       0.58      0.25      0.35       488
           6       0.54      0.81      0.65      1606

    accuracy                           0.64      4968
   macro avg       0.64      0.50      0.54      4968
weighted avg       0.66      0.64      0.63      4968


--- Decision Tree (TF-IDF sin acentos) ---
Accuracy: 0.5276
              precision    recall  f1-score   support

           0       0.31      0.28      0.30       572
           1       0.32      0.25      0.28        76
           2       0.36      0.19      0.25        80
           3       0.69      0.70      0.69      1863
           4       0.38      0.31      0.34       283
           5       0.32      0.27      0.29       488
           6       0.50      0.56      0.53      1606

    accuracy                           0.53      4968
   macro avg       0.41      0.37      0.38      4968
weighted avg       0.52      0.53      0.52      4968


-- TF-IDF SIN STOPWORDS Y EN MINUSCULAS --

--- Logistic Regression (TF-IDF sin stopwords y en minusculas) ---
Accuracy: 0.6339
              precision    recall  f1-score   support

           0       0.58      0.30      0.39       572
           1       0.71      0.29      0.41        76
           2       0.87      0.34      0.49        80
           3       0.77      0.77      0.77      1863
           4       0.68      0.36      0.47       283
           5       0.59      0.20      0.29       488
           6       0.53      0.81      0.64      1606

    accuracy                           0.63      4968
   macro avg       0.68      0.44      0.50      4968
weighted avg       0.65      0.63      0.61      4968


--- SVM (linear) (TF-IDF sin stopwords y en minusculas) ---
Accuracy: 0.6381
              precision    recall  f1-score   support

           0       0.57      0.33      0.42       572
           1       0.57      0.41      0.48        76
           2       0.70      0.53      0.60        80
           3       0.79      0.75      0.77      1863
           4       0.67      0.45      0.54       283
           5       0.66      0.17      0.27       488
           6       0.53      0.81      0.64      1606

    accuracy                           0.64      4968
   macro avg       0.64      0.49      0.53      4968
weighted avg       0.66      0.64      0.62      4968


--- Decision Tree (TF-IDF sin stopwords y en minusculas) ---
Accuracy: 0.5672
              precision    recall  f1-score   support

           0       0.36      0.31      0.33       572
           1       0.46      0.32      0.38        76
           2       0.60      0.44      0.51        80
           3       0.70      0.74      0.72      1863
           4       0.52      0.42      0.46       283
           5       0.34      0.28      0.31       488
           6       0.54      0.59      0.56      1606

    accuracy                           0.57      4968
   macro avg       0.50      0.44      0.47      4968
weighted avg       0.56      0.57      0.56      4968


-- TF-IDF SIN ACENTOS Y EN MINUSCULAS --

--- Logistic Regression (TF-IDF sin acentos y en minusculas) ---
Accuracy: 0.6333
              precision    recall  f1-score   support

           0       0.58      0.30      0.40       572
           1       0.68      0.25      0.37        76
           2       0.88      0.29      0.43        80
           3       0.76      0.77      0.77      1863
           4       0.70      0.34      0.46       283
           5       0.56      0.27      0.36       488
           6       0.54      0.79      0.64      1606

    accuracy                           0.63      4968
   macro avg       0.67      0.43      0.49      4968
weighted avg       0.65      0.63      0.61      4968


--- SVM (linear) (TF-IDF sin acentos y en minusculas) ---
Accuracy: 0.6439
              precision    recall  f1-score   support

           0       0.58      0.31      0.41       572
           1       0.57      0.38      0.46        76
           2       0.74      0.53      0.61        80
           3       0.81      0.75      0.78      1863
           4       0.70      0.46      0.55       283
           5       0.58      0.25      0.35       488
           6       0.54      0.81      0.65      1606

    accuracy                           0.64      4968
   macro avg       0.64      0.50      0.54      4968
weighted avg       0.66      0.64      0.63      4968


--- Decision Tree (TF-IDF sin acentos y en minusculas) ---
Accuracy: 0.5276
              precision    recall  f1-score   support

           0       0.31      0.28      0.30       572
           1       0.32      0.25      0.28        76
           2       0.36      0.19      0.25        80
           3       0.69      0.70      0.69      1863
           4       0.38      0.31      0.34       283
           5       0.32      0.27      0.29       488
           6       0.50      0.56      0.53      1606

    accuracy                           0.53      4968
   macro avg       0.41      0.37      0.38      4968
weighted avg       0.52      0.53      0.52      4968


-- TF-IDF COMPLETO --

--- Logistic Regression (TF-IDF completo) ---
Accuracy: 0.6322
              precision    recall  f1-score   support

           0       0.58      0.30      0.39       572
           1       0.69      0.29      0.41        76
           2       0.87      0.34      0.49        80
           3       0.77      0.77      0.77      1863
           4       0.69      0.37      0.48       283
           5       0.58      0.19      0.29       488
           6       0.53      0.80      0.64      1606

    accuracy                           0.63      4968
   macro avg       0.67      0.44      0.50      4968
weighted avg       0.65      0.63      0.61      4968


--- SVM (linear) (TF-IDF completo) ---
Accuracy: 0.6361
              precision    recall  f1-score   support

           0       0.58      0.33      0.42       572
           1       0.58      0.38      0.46        76
           2       0.68      0.53      0.59        80
           3       0.79      0.75      0.77      1863
           4       0.67      0.45      0.54       283
           5       0.65      0.17      0.27       488
           6       0.53      0.81      0.64      1606

    accuracy                           0.64      4968
   macro avg       0.64      0.49      0.53      4968
weighted avg       0.65      0.64      0.62      4968


--- Decision Tree (TF-IDF completo) ---
Accuracy: 0.5614
              precision    recall  f1-score   support

           0       0.34      0.32      0.33       572
           1       0.54      0.42      0.47        76
           2       0.54      0.38      0.44        80
           3       0.70      0.73      0.72      1863
           4       0.50      0.42      0.46       283
           5       0.35      0.26      0.30       488
           6       0.52      0.58      0.55      1606

    accuracy                           0.56      4968
   macro avg       0.50      0.44      0.47      4968
weighted avg       0.55      0.56      0.55      4968

In [23]:
# -- CCOMPARATIVA CON LEMATIZACIÓN --

print("\n-- CON LEMATIZAR --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf, y_train, X_test_lem_tfidf, y_test, f"{model_name} (Lematizado)")

print("\n-- CON LEMATIZAR Y SIN STOPWORDS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_stopwords, y_train, X_test_lem_tfidf_stopwords, y_test, f"{model_name} (Lematizado sin stopwords)")

print("\n-- CON LEMATIZAR Y EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_lowercase, y_train, X_test_lem_tfidf_lowercase, y_test, f"{model_name} (Lematizado en minusculas)")

print("\n-- CON LEMATIZAR Y SIN ACENTOS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_strip, y_train, X_test_lem_tfidf_strip, y_test, f"{model_name} (Lematizado sin acentos)")

print("\n-- CON LEMATIZAR, SIN STOPWORDS Y EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_stopwords_lowercase, y_train, X_test_lem_tfidf_stopwords_lowercase, y_test, f"{model_name} (Lematizado sin stopwords y en minusculas)")

print("\n-- CON LEMATIZAR, SIN ACENTOS Y EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_strip_lowercase, y_train, X_test_lem_tfidf_strip_lowercase, y_test, f"{model_name} (Lematizado sin acentos y en minusculas)")


print("\n-- CON LEMATIZAR Y COMPLETO --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_complete, y_train, X_test_lem_tfidf_complete, y_test, f"{model_name} (Lematizado completo)")



-- CON LEMATIZAR --

--- Logistic Regression (Lematizado) ---
Accuracy: 0.6379
              precision    recall  f1-score   support

           0       0.59      0.30      0.40       572
           1       0.80      0.26      0.40        76
           2       0.96      0.33      0.49        80
           3       0.77      0.77      0.77      1863
           4       0.71      0.35      0.47       283
           5       0.55      0.28      0.38       488
           6       0.54      0.79      0.64      1606

    accuracy                           0.64      4968
   macro avg       0.70      0.44      0.51      4968
weighted avg       0.65      0.64      0.62      4968


--- SVM (linear) (Lematizado) ---
Accuracy: 0.6443
              precision    recall  f1-score   support

           0       0.58      0.32      0.41       572
           1       0.56      0.37      0.44        76
           2       0.76      0.53      0.62        80
           3       0.81      0.75      0.78      1863


-- CON LEMATIZAR --

--- Logistic Regression (Lematizado) ---
Accuracy: 0.6379
              precision    recall  f1-score   support

           0       0.59      0.30      0.40       572
           1       0.80      0.26      0.40        76
           2       0.96      0.33      0.49        80
           3       0.77      0.77      0.77      1863
           4       0.71      0.35      0.47       283
           5       0.55      0.28      0.38       488
           6       0.54      0.79      0.64      1606

    accuracy                           0.64      4968
   macro avg       0.70      0.44      0.51      4968
weighted avg       0.65      0.64      0.62      4968


--- SVM (linear) (Lematizado) ---
Accuracy: 0.6443
              precision    recall  f1-score   support

           0       0.58      0.32      0.41       572
           1       0.56      0.37      0.44        76
           2       0.76      0.53      0.62        80
           3       0.81      0.75      0.78      1863
           4       0.69      0.45      0.54       283
           5       0.57      0.25      0.34       488
           6       0.54      0.82      0.65      1606

    accuracy                           0.64      4968
   macro avg       0.64      0.50      0.54      4968
weighted avg       0.66      0.64      0.63      4968


--- Decision Tree (Lematizado) ---
Accuracy: 0.5209
              precision    recall  f1-score   support

           0       0.30      0.26      0.28       572
           1       0.42      0.30      0.35        76
           2       0.33      0.17      0.23        80
           3       0.68      0.70      0.69      1863
           4       0.38      0.30      0.34       283
           5       0.28      0.24      0.26       488
           6       0.50      0.56      0.53      1606

    accuracy                           0.52      4968
   macro avg       0.41      0.36      0.38      4968
weighted avg       0.51      0.52      0.51      4968


-- CON LEMATIZAR Y SIN STOPWORDS --

--- Logistic Regression (Lematizado sin stopwords) ---
Accuracy: 0.6355
              precision    recall  f1-score   support

           0       0.59      0.31      0.40       572
           1       0.70      0.28      0.40        76
           2       0.88      0.35      0.50        80
           3       0.77      0.77      0.77      1863
           4       0.69      0.36      0.47       283
           5       0.58      0.20      0.30       488
           6       0.53      0.81      0.64      1606

    accuracy                           0.64      4968
   macro avg       0.68      0.44      0.50      4968
weighted avg       0.65      0.64      0.61      4968


--- SVM (linear) (Lematizado sin stopwords) ---
Accuracy: 0.6397
              precision    recall  f1-score   support

           0       0.58      0.33      0.42       572
           1       0.52      0.37      0.43        76
           2       0.70      0.53      0.60        80
           3       0.79      0.76      0.77      1863
           4       0.67      0.43      0.53       283
           5       0.62      0.17      0.27       488
           6       0.54      0.81      0.65      1606

    accuracy                           0.64      4968
   macro avg       0.63      0.48      0.52      4968
weighted avg       0.66      0.64      0.62      4968


--- Decision Tree (Lematizado sin stopwords) ---
Accuracy: 0.5570
              precision    recall  f1-score   support

           0       0.34      0.30      0.32       572
           1       0.40      0.32      0.35        76
           2       0.63      0.45      0.53        80
           3       0.69      0.73      0.71      1863
           4       0.54      0.42      0.47       283
           5       0.31      0.22      0.26       488
           6       0.52      0.59      0.55      1606

    accuracy                           0.56      4968
   macro avg       0.49      0.43      0.46      4968
weighted avg       0.55      0.56      0.55      4968


-- CON LEMATIZAR Y EN MINUSCULAS --

--- Logistic Regression (Lematizado en minusculas) ---
Accuracy: 0.6379
              precision    recall  f1-score   support

           0       0.59      0.30      0.40       572
           1       0.80      0.26      0.40        76
           2       0.96      0.33      0.49        80
           3       0.77      0.77      0.77      1863
           4       0.71      0.35      0.47       283
           5       0.55      0.28      0.38       488
           6       0.54      0.79      0.64      1606

    accuracy                           0.64      4968
   macro avg       0.70      0.44      0.51      4968
weighted avg       0.65      0.64      0.62      4968


--- SVM (linear) (Lematizado en minusculas) ---
Accuracy: 0.6443
              precision    recall  f1-score   support

           0       0.58      0.32      0.41       572
           1       0.56      0.37      0.44        76
           2       0.76      0.53      0.62        80
           3       0.81      0.75      0.78      1863
           4       0.69      0.45      0.54       283
           5       0.57      0.25      0.34       488
           6       0.54      0.82      0.65      1606

    accuracy                           0.64      4968
   macro avg       0.64      0.50      0.54      4968
weighted avg       0.66      0.64      0.63      4968


--- Decision Tree (Lematizado en minusculas) ---
Accuracy: 0.5209
              precision    recall  f1-score   support

           0       0.30      0.26      0.28       572
           1       0.42      0.30      0.35        76
           2       0.33      0.17      0.23        80
           3       0.68      0.70      0.69      1863
           4       0.38      0.30      0.34       283
           5       0.28      0.24      0.26       488
           6       0.50      0.56      0.53      1606

    accuracy                           0.52      4968
   macro avg       0.41      0.36      0.38      4968
weighted avg       0.51      0.52      0.51      4968


-- CON LEMATIZAR Y SIN ACENTOS --

--- Logistic Regression (Lematizado sin acentos) ---
Accuracy: 0.6385
              precision    recall  f1-score   support

           0       0.58      0.30      0.39       572
           1       0.80      0.26      0.40        76
           2       0.96      0.31      0.47        80
           3       0.76      0.78      0.77      1863
           4       0.70      0.35      0.47       283
           5       0.56      0.28      0.38       488
           6       0.54      0.79      0.64      1606

    accuracy                           0.64      4968
   macro avg       0.70      0.44      0.50      4968
weighted avg       0.65      0.64      0.62      4968


--- SVM (linear) (Lematizado sin acentos) ---
Accuracy: 0.6457
              precision    recall  f1-score   support

           0       0.59      0.33      0.42       572
           1       0.56      0.37      0.44        76
           2       0.76      0.55      0.64        80
           3       0.81      0.74      0.77      1863
           4       0.69      0.45      0.54       283
           5       0.58      0.25      0.35       488
           6       0.54      0.82      0.65      1606

    accuracy                           0.65      4968
   macro avg       0.65      0.50      0.55      4968
weighted avg       0.66      0.65      0.63      4968


--- Decision Tree (Lematizado sin acentos) ---
Accuracy: 0.5302
              precision    recall  f1-score   support

           0       0.31      0.27      0.29       572
           1       0.37      0.24      0.29        76
           2       0.36      0.23      0.28        80
           3       0.68      0.72      0.70      1863
           4       0.42      0.30      0.35       283
           5       0.28      0.24      0.26       488
           6       0.50      0.56      0.53      1606

    accuracy                           0.53      4968
   macro avg       0.42      0.36      0.39      4968
weighted avg       0.52      0.53      0.52      4968


-- CON LEMATIZAR, SIN STOPWORDS Y EN MINUSCULAS --

--- Logistic Regression (Lematizado sin stopwords y en minusculas) ---
Accuracy: 0.6355
              precision    recall  f1-score   support

           0       0.59      0.31      0.40       572
           1       0.70      0.28      0.40        76
           2       0.88      0.35      0.50        80
           3       0.77      0.77      0.77      1863
           4       0.69      0.36      0.47       283
           5       0.58      0.20      0.30       488
           6       0.53      0.81      0.64      1606

    accuracy                           0.64      4968
   macro avg       0.68      0.44      0.50      4968
weighted avg       0.65      0.64      0.61      4968


--- SVM (linear) (Lematizado sin stopwords y en minusculas) ---
Accuracy: 0.6397
              precision    recall  f1-score   support

           0       0.58      0.33      0.42       572
           1       0.52      0.37      0.43        76
           2       0.70      0.53      0.60        80
           3       0.79      0.76      0.77      1863
           4       0.67      0.43      0.53       283
           5       0.62      0.17      0.27       488
           6       0.54      0.81      0.65      1606

    accuracy                           0.64      4968
   macro avg       0.63      0.48      0.52      4968
weighted avg       0.66      0.64      0.62      4968


--- Decision Tree (Lematizado sin stopwords y en minusculas) ---
Accuracy: 0.5570
              precision    recall  f1-score   support

           0       0.34      0.30      0.32       572
           1       0.40      0.32      0.35        76
           2       0.63      0.45      0.53        80
           3       0.69      0.73      0.71      1863
           4       0.54      0.42      0.47       283
           5       0.31      0.22      0.26       488
           6       0.52      0.59      0.55      1606

    accuracy                           0.56      4968
   macro avg       0.49      0.43      0.46      4968
weighted avg       0.55      0.56      0.55      4968


-- CON LEMATIZAR, SIN ACENTOS Y EN MINUSCULAS --

--- Logistic Regression (Lematizado sin acentos y en minusculas) ---
Accuracy: 0.6385
              precision    recall  f1-score   support

           0       0.58      0.30      0.39       572
           1       0.80      0.26      0.40        76
           2       0.96      0.31      0.47        80
           3       0.76      0.78      0.77      1863
           4       0.70      0.35      0.47       283
           5       0.56      0.28      0.38       488
           6       0.54      0.79      0.64      1606

    accuracy                           0.64      4968
   macro avg       0.70      0.44      0.50      4968
weighted avg       0.65      0.64      0.62      4968


--- SVM (linear) (Lematizado sin acentos y en minusculas) ---
Accuracy: 0.6457
              precision    recall  f1-score   support

           0       0.59      0.33      0.42       572
           1       0.56      0.37      0.44        76
           2       0.76      0.55      0.64        80
           3       0.81      0.74      0.77      1863
           4       0.69      0.45      0.54       283
           5       0.58      0.25      0.35       488
           6       0.54      0.82      0.65      1606

    accuracy                           0.65      4968
   macro avg       0.65      0.50      0.55      4968
weighted avg       0.66      0.65      0.63      4968


--- Decision Tree (Lematizado sin acentos y en minusculas) ---
Accuracy: 0.5302
              precision    recall  f1-score   support

           0       0.31      0.27      0.29       572
           1       0.37      0.24      0.29        76
           2       0.36      0.23      0.28        80
           3       0.68      0.72      0.70      1863
           4       0.42      0.30      0.35       283
           5       0.28      0.24      0.26       488
           6       0.50      0.56      0.53      1606

    accuracy                           0.53      4968
   macro avg       0.42      0.36      0.39      4968
weighted avg       0.52      0.53      0.52      4968


-- CON LEMATIZAR Y COMPLETO --

--- Logistic Regression (Lematizado completo) ---
Accuracy: 0.6337
              precision    recall  f1-score   support

           0       0.60      0.31      0.41       572
           1       0.70      0.28      0.40        76
           2       0.88      0.35      0.50        80
           3       0.77      0.77      0.77      1863
           4       0.69      0.36      0.47       283
           5       0.57      0.21      0.30       488
           6       0.53      0.80      0.64      1606

    accuracy                           0.63      4968
   macro avg       0.68      0.44      0.50      4968
weighted avg       0.65      0.63      0.61      4968


--- SVM (linear) (Lematizado completo) ---
Accuracy: 0.6387
              precision    recall  f1-score   support

           0       0.58      0.33      0.42       572
           1       0.52      0.36      0.42        76
           2       0.70      0.53      0.60        80
           3       0.79      0.75      0.77      1863
           4       0.66      0.44      0.53       283
           5       0.64      0.18      0.28       488
           6       0.53      0.82      0.65      1606

    accuracy                           0.64      4968
   macro avg       0.63      0.48      0.52      4968
weighted avg       0.66      0.64      0.62      4968


--- Decision Tree (Lematizado completo) ---
Accuracy: 0.5612
              precision    recall  f1-score   support

           0       0.35      0.31      0.33       572
           1       0.40      0.30      0.35        76
           2       0.53      0.39      0.45        80
           3       0.71      0.75      0.73      1863
           4       0.49      0.39      0.43       283
           5       0.29      0.23      0.26       488
           6       0.53      0.59      0.56      1606

    accuracy                           0.56      4968
   macro avg       0.47      0.42      0.44      4968
weighted avg       0.55      0.56      0.55      4968