In [13]:

import pandas as pd
import nltk
import re
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

#Carga de datos
train_data = pd.read_csv("../../Data/BasedOnSentiments/train_sentiments.csv")
test_data = pd.read_csv("../../Data/BasedOnSentiments/test_sentiments.csv")
valid_data = pd.read_csv("../../Data/BasedOnSentiments/valid_sentiments.csv")

# --- PREPROCESAMIENTO PARA LEMATIZACIÓN ---
#nltk.download('punkt')
#nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    words = re.findall(r'\b\w+\b', text.lower())
    lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
    return " ".join(lemmatized_words)

# Datos
X_train_raw = train_data['Text'].values
X_test_raw = test_data['Text'].values
y_train = train_data['Sentiment'].values
y_test = test_data['Sentiment'].values

# Datos lematizados
X_train_lem = [preprocess_text(text) for text in X_train_raw]
X_test_lem = [preprocess_text(text) for text in X_test_raw]

In [14]:
# --- DIRECTORIO DE SALIDA DE LOS PLOTS ---
output_dir = "../../Plots/Experiment1/Sentiments/Experiment/"


import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Función para graficar matriz de confusión
def plot_confusion_matrix(y_true, y_pred, labels, title):
    cm = confusion_matrix(y_true, y_pred, labels=list(range(len(labels))))

    plt.figure(figsize=(10, 8))
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Blues", 
        xticklabels=labels,
        yticklabels=labels,
        cbar=True
    )
    plt.title(f'Matriz de Confusión - {title}')
    plt.ylabel('Etiqueta Verdadera')
    plt.xlabel('Etiqueta Predicha')
    plt.xticks(rotation=45)
    plt.yticks(rotation=0)
    plt.tight_layout()
    # Guardar archivo
    filename = title.replace(" ", "_").replace("(", "").replace(")", "").lower() + ".png"
    plt.savefig(output_dir + filename)
    plt.close()

In [15]:
# --- FUNCION GENERAL PARA COMPARAR ---
def train_and_evaluate(model, X_train, y_train, X_test, y_test, label):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"\n--- {label} ---")
    print(f"Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred))
    # Mostrar matriz de confusión
    sentiment_labels = ["Positive", "Negative", "Ambiguous", "Neutral"]
    
    plot_confusion_matrix(y_test, y_pred, sentiment_labels, label)

In [None]:
# --- VECTORIZADORES ---
vectorizer_raw = TfidfVectorizer(max_features=10000)
vectorizer_stopwords = TfidfVectorizer(max_features=10000, stop_words='english')
vectorizer_lowercase = TfidfVectorizer(max_features=10000, lowercase=True)
vectorizer_strip = TfidfVectorizer(max_features=10000, strip_accents='unicode')
vectorizer_stopwords_lowercase = TfidfVectorizer(max_features=10000, stop_words='english', lowercase=True)
vectorizer_strip_lowercase = TfidfVectorizer(max_features=10000, strip_accents='unicode', lowercase=True)
vectorizer_complete = TfidfVectorizer(max_features=10000, stop_words='english', lowercase=True, strip_accents='unicode')

#TRAIN Y TEST SIN LEMATIZACIÓN
X_train_raw_tfidf = vectorizer_raw.fit_transform(X_train_raw)
X_test_raw_tfidf = vectorizer_raw.transform(X_test_raw)

X_train_raw_tfidf_stopwords = vectorizer_stopwords.fit_transform(X_train_raw)
X_test_raw_tfidf_stopwords = vectorizer_stopwords.transform(X_test_raw)

X_train_raw_tfidf_lowercase = vectorizer_lowercase.fit_transform(X_train_raw)
X_test_raw_tfidf_lowercase = vectorizer_lowercase.transform(X_test_raw)

X_train_raw_tfidf_strip = vectorizer_strip.fit_transform(X_train_raw)
X_test_raw_tfidf_strip = vectorizer_strip.transform(X_test_raw)

X_train_raw_tfidf_stopwords_lowercase = vectorizer_stopwords_lowercase.fit_transform(X_train_raw)
X_test_raw_tfidf_stopwords_lowercase = vectorizer_stopwords_lowercase.transform(X_test_raw)

X_train_raw_tfidf_strip_lowercase = vectorizer_strip_lowercase.fit_transform(X_train_raw)
X_test_raw_tfidf_strip_lowercase = vectorizer_strip_lowercase.transform(X_test_raw)

X_train_raw_tfidf_complete = vectorizer_complete.fit_transform(X_train_raw)
X_test_raw_tfidf_complete = vectorizer_complete.transform(X_test_raw)

#TRAIN Y TEST PARA LEMATIZACIÓN
X_train_lem_tfidf = vectorizer_raw.fit_transform(X_train_lem)
X_test_lem_tfidf = vectorizer_raw.transform(X_test_lem)

X_train_lem_tfidf_stopwords = vectorizer_stopwords.fit_transform(X_train_lem)
X_test_lem_tfidf_stopwords = vectorizer_stopwords.transform(X_test_lem)

X_train_lem_tfidf_lowercase = vectorizer_lowercase.fit_transform(X_train_lem)
X_test_lem_tfidf_lowercase = vectorizer_lowercase.transform(X_test_lem)

X_train_lem_tfidf_strip = vectorizer_strip.fit_transform(X_train_lem)
X_test_lem_tfidf_strip = vectorizer_strip.transform(X_test_lem)

X_train_lem_tfidf_stopwords_lowercase = vectorizer_stopwords_lowercase.fit_transform(X_train_lem)
X_test_lem_tfidf_stopwords_lowercase = vectorizer_stopwords_lowercase.transform(X_test_lem)

X_train_lem_tfidf_strip_lowercase = vectorizer_strip_lowercase.fit_transform(X_train_lem)
X_test_lem_tfidf_strip_lowercase = vectorizer_strip_lowercase.transform(X_test_lem)

X_train_lem_tfidf_complete = vectorizer_complete.fit_transform(X_train_lem)
X_test_lem_tfidf_complete = vectorizer_complete.transform(X_test_lem)




In [17]:
#Modelos de clasificación
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
    'SVM (linear)': SVC(kernel='linear'),
    'Decision Tree': DecisionTreeClassifier(random_state=42)
}

In [18]:
# --- COMPARATIVA FINAL ---
print("\n-- TF-IDF --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf, y_train, X_test_raw_tfidf, y_test, f"{model_name} (TF-IDF)")

print("\n-- TF-IDF SIN STOPWORDS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_stopwords, y_train, X_test_raw_tfidf_stopwords, y_test, f"{model_name} (TF-IDF sin stopwords)")

print("\n-- TF-IDF EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_lowercase, y_train, X_test_raw_tfidf_lowercase, y_test, f"{model_name} (TF-IDF en minusculas)")

print("\n-- TF-IDF SIN ACENTOS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_strip, y_train, X_test_raw_tfidf_strip, y_test, f"{model_name} (TF-IDF sin acentos)")

print("\n-- TF-IDF SIN STOPWORDS Y EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_stopwords_lowercase, y_train, X_test_raw_tfidf_stopwords_lowercase, y_test, f"{model_name} (TF-IDF sin stopwords y en minusculas)")

print("\n-- TF-IDF SIN ACENTOS Y EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_strip_lowercase, y_train, X_test_raw_tfidf_strip_lowercase, y_test, f"{model_name} (TF-IDF sin acentos y en minusculas)")

print("\n-- TF-IDF COMPLETO --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_raw_tfidf_complete, y_train, X_test_raw_tfidf_complete, y_test, f"{model_name} (TF-IDF completo)")


-- TF-IDF --

--- Logistic Regression (TF-IDF) ---
Accuracy: 0.6563
              precision    recall  f1-score   support

           0       0.78      0.76      0.77      1863
           1       0.67      0.52      0.58      1070
           2       0.57      0.26      0.35       488
           3       0.55      0.75      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.57      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- SVM (linear) (TF-IDF) ---
Accuracy: 0.6612
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.69      0.52      0.59      1070
           2       0.60      0.25      0.35       488
           3       0.55      0.78      0.65      1606

    accuracy                           0.66      5027
   macro avg       0.66      0.57      0.59      5027
weighted avg       0.68      0.66      0.65      5027


--- Decision

-- TF-IDF --

--- Logistic Regression (TF-IDF) ---
Accuracy: 0.6563
              precision    recall  f1-score   support

           0       0.78      0.76      0.77      1863
           1       0.67      0.52      0.58      1070
           2       0.57      0.26      0.35       488
           3       0.55      0.75      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.57      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- SVM (linear) (TF-IDF) ---
Accuracy: 0.6612
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.69      0.52      0.59      1070
           2       0.60      0.25      0.35       488
           3       0.55      0.78      0.65      1606

    accuracy                           0.66      5027
   macro avg       0.66      0.57      0.59      5027
weighted avg       0.68      0.66      0.65      5027


--- Decision Tree (TF-IDF) ---
Accuracy: 0.5327
              precision    recall  f1-score   support

           0       0.68      0.68      0.68      1863
           1       0.45      0.40      0.42      1070
           2       0.28      0.24      0.26       488
           3       0.48      0.54      0.51      1606

    accuracy                           0.53      5027
   macro avg       0.47      0.46      0.47      5027
weighted avg       0.53      0.53      0.53      5027


-- TF-IDF SIN STOPWORDS --

--- Logistic Regression (TF-IDF sin stopwords) ---
Accuracy: 0.6533
              precision    recall  f1-score   support

           0       0.79      0.76      0.77      1863
           1       0.67      0.51      0.58      1070
           2       0.59      0.19      0.28       488
           3       0.54      0.76      0.64      1606

    accuracy                           0.65      5027
   macro avg       0.65      0.56      0.57      5027
weighted avg       0.67      0.65      0.64      5027


--- SVM (linear) (TF-IDF sin stopwords) ---
Accuracy: 0.6527
              precision    recall  f1-score   support

           0       0.78      0.75      0.77      1863
           1       0.68      0.52      0.59      1070
           2       0.65      0.17      0.28       488
           3       0.54      0.77      0.64      1606

    accuracy                           0.65      5027
   macro avg       0.66      0.55      0.57      5027
weighted avg       0.67      0.65      0.64      5027


--- Decision Tree (TF-IDF sin stopwords) ---
Accuracy: 0.5833
              precision    recall  f1-score   support

           0       0.71      0.73      0.72      1863
           1       0.54      0.50      0.52      1070
           2       0.34      0.27      0.30       488
           3       0.52      0.56      0.54      1606

    accuracy                           0.58      5027
   macro avg       0.53      0.52      0.52      5027
weighted avg       0.58      0.58      0.58      5027


-- TF-IDF EN MINUSCULAS --

--- Logistic Regression (TF-IDF en minusculas) ---
Accuracy: 0.6563
              precision    recall  f1-score   support

           0       0.78      0.76      0.77      1863
           1       0.67      0.52      0.58      1070
           2       0.57      0.26      0.35       488
           3       0.55      0.75      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.57      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- SVM (linear) (TF-IDF en minusculas) ---
Accuracy: 0.6612
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.69      0.52      0.59      1070
           2       0.60      0.25      0.35       488
           3       0.55      0.78      0.65      1606

    accuracy                           0.66      5027
   macro avg       0.66      0.57      0.59      5027
weighted avg       0.68      0.66      0.65      5027


--- Decision Tree (TF-IDF en minusculas) ---
Accuracy: 0.5327
              precision    recall  f1-score   support

           0       0.68      0.68      0.68      1863
           1       0.45      0.40      0.42      1070
           2       0.28      0.24      0.26       488
           3       0.48      0.54      0.51      1606

    accuracy                           0.53      5027
   macro avg       0.47      0.46      0.47      5027
weighted avg       0.53      0.53      0.53      5027


-- TF-IDF SIN ACENTOS --

--- Logistic Regression (TF-IDF sin acentos) ---
Accuracy: 0.6574
              precision    recall  f1-score   support

           0       0.78      0.76      0.77      1863
           1       0.68      0.52      0.59      1070
           2       0.58      0.25      0.35       488
           3       0.55      0.75      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.57      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- SVM (linear) (TF-IDF sin acentos) ---
Accuracy: 0.6616
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.69      0.53      0.60      1070
           2       0.60      0.25      0.35       488
           3       0.55      0.78      0.65      1606

    accuracy                           0.66      5027
   macro avg       0.66      0.58      0.59      5027
weighted avg       0.68      0.66      0.65      5027


--- Decision Tree (TF-IDF sin acentos) ---
Accuracy: 0.5447
              precision    recall  f1-score   support

           0       0.68      0.70      0.69      1863
           1       0.45      0.40      0.42      1070
           2       0.28      0.23      0.25       488
           3       0.50      0.56      0.53      1606

    accuracy                           0.54      5027
   macro avg       0.48      0.47      0.47      5027
weighted avg       0.54      0.54      0.54      5027


-- TF-IDF SIN STOPWORDS Y EN MINUSCULAS --

--- Logistic Regression (TF-IDF sin stopwords y en minusculas) ---
Accuracy: 0.6533
              precision    recall  f1-score   support

           0       0.79      0.76      0.77      1863
           1       0.67      0.51      0.58      1070
           2       0.59      0.19      0.28       488
           3       0.54      0.76      0.64      1606

    accuracy                           0.65      5027
   macro avg       0.65      0.56      0.57      5027
weighted avg       0.67      0.65      0.64      5027


--- SVM (linear) (TF-IDF sin stopwords y en minusculas) ---
Accuracy: 0.6527
              precision    recall  f1-score   support

           0       0.78      0.75      0.77      1863
           1       0.68      0.52      0.59      1070
           2       0.65      0.17      0.28       488
           3       0.54      0.77      0.64      1606

    accuracy                           0.65      5027
   macro avg       0.66      0.55      0.57      5027
weighted avg       0.67      0.65      0.64      5027


--- Decision Tree (TF-IDF sin stopwords y en minusculas) ---
Accuracy: 0.5833
              precision    recall  f1-score   support

           0       0.71      0.73      0.72      1863
           1       0.54      0.50      0.52      1070
           2       0.34      0.27      0.30       488
           3       0.52      0.56      0.54      1606

    accuracy                           0.58      5027
   macro avg       0.53      0.52      0.52      5027
weighted avg       0.58      0.58      0.58      5027


-- TF-IDF SIN ACENTOS Y EN MINUSCULAS --

--- Logistic Regression (TF-IDF sin acentos y en minusculas) ---
Accuracy: 0.6574
              precision    recall  f1-score   support

           0       0.78      0.76      0.77      1863
           1       0.68      0.52      0.59      1070
           2       0.58      0.25      0.35       488
           3       0.55      0.75      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.57      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- SVM (linear) (TF-IDF sin acentos y en minusculas) ---
Accuracy: 0.6616
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.69      0.53      0.60      1070
           2       0.60      0.25      0.35       488
           3       0.55      0.78      0.65      1606

    accuracy                           0.66      5027
   macro avg       0.66      0.58      0.59      5027
weighted avg       0.68      0.66      0.65      5027


--- Decision Tree (TF-IDF sin acentos y en minusculas) ---
Accuracy: 0.5447
              precision    recall  f1-score   support

           0       0.68      0.70      0.69      1863
           1       0.45      0.40      0.42      1070
           2       0.28      0.23      0.25       488
           3       0.50      0.56      0.53      1606

    accuracy                           0.54      5027
   macro avg       0.48      0.47      0.47      5027
weighted avg       0.54      0.54      0.54      5027


-- TF-IDF COMPLETO --

--- Logistic Regression (TF-IDF completo) ---
Accuracy: 0.6547
              precision    recall  f1-score   support

           0       0.79      0.76      0.78      1863
           1       0.67      0.52      0.58      1070
           2       0.61      0.19      0.29       488
           3       0.54      0.76      0.64      1606

    accuracy                           0.65      5027
   macro avg       0.65      0.56      0.57      5027
weighted avg       0.67      0.65      0.64      5027


--- SVM (linear) (TF-IDF completo) ---
Accuracy: 0.6533
              precision    recall  f1-score   support

           0       0.78      0.75      0.77      1863
           1       0.68      0.53      0.60      1070
           2       0.66      0.17      0.27       488
           3       0.54      0.77      0.64      1606

    accuracy                           0.65      5027
   macro avg       0.67      0.56      0.57      5027
weighted avg       0.67      0.65      0.64      5027


--- Decision Tree (TF-IDF completo) ---
Accuracy: 0.5787
              precision    recall  f1-score   support

           0       0.70      0.73      0.72      1863
           1       0.53      0.48      0.51      1070
           2       0.34      0.26      0.29       488
           3       0.52      0.56      0.54      1606

    accuracy                           0.58      5027
   macro avg       0.52      0.51      0.51      5027
weighted avg       0.57      0.58      0.57      5027

In [19]:
# -- CCOMPARATIVA CON LEMATIZACIÓN --

print("\n-- CON LEMATIZAR --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf, y_train, X_test_lem_tfidf, y_test, f"{model_name} (Lematizado)")

print("\n-- CON LEMATIZAR Y SIN STOPWORDS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_stopwords, y_train, X_test_lem_tfidf_stopwords, y_test, f"{model_name} (Lematizado sin stopwords)")

print("\n-- CON LEMATIZAR Y EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_lowercase, y_train, X_test_lem_tfidf_lowercase, y_test, f"{model_name} (Lematizado en minusculas)")

print("\n-- CON LEMATIZAR Y SIN ACENTOS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_strip, y_train, X_test_lem_tfidf_strip, y_test, f"{model_name} (Lematizado sin acentos)")

print("\n-- CON LEMATIZAR, SIN STOPWORDS Y EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_stopwords_lowercase, y_train, X_test_lem_tfidf_stopwords_lowercase, y_test, f"{model_name} (Lematizado sin stopwords y en minusculas)")

print("\n-- CON LEMATIZAR, SIN ACENTOS Y EN MINUSCULAS --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_strip_lowercase, y_train, X_test_lem_tfidf_strip_lowercase, y_test, f"{model_name} (Lematizado sin acentos y en minusculas)")


print("\n-- CON LEMATIZAR Y COMPLETO --")
for model_name, model in models.items():
    train_and_evaluate(model, X_train_lem_tfidf_complete, y_train, X_test_lem_tfidf_complete, y_test, f"{model_name} (Lematizado completo)")



-- CON LEMATIZAR --

--- Logistic Regression (Lematizado) ---
Accuracy: 0.6606
              precision    recall  f1-score   support

           0       0.78      0.77      0.77      1863
           1       0.68      0.51      0.59      1070
           2       0.58      0.27      0.37       488
           3       0.56      0.75      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.58      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- SVM (linear) (Lematizado) ---
Accuracy: 0.6602
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.68      0.52      0.59      1070
           2       0.59      0.24      0.34       488
           3       0.55      0.78      0.65      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.57      0.59      5027
weighted avg       0.67      0.66      0.65      5027

-- CON LEMATIZAR --

--- Logistic Regression (Lematizado) ---
Accuracy: 0.6606
              precision    recall  f1-score   support

           0       0.78      0.77      0.77      1863
           1       0.68      0.51      0.59      1070
           2       0.58      0.27      0.37       488
           3       0.56      0.75      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.58      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- SVM (linear) (Lematizado) ---
Accuracy: 0.6602
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.68      0.52      0.59      1070
           2       0.59      0.24      0.34       488
           3       0.55      0.78      0.65      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.57      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- Decision Tree (Lematizado) ---
Accuracy: 0.5443
              precision    recall  f1-score   support

           0       0.69      0.70      0.69      1863
           1       0.47      0.42      0.44      1070
           2       0.27      0.22      0.25       488
           3       0.49      0.55      0.52      1606

    accuracy                           0.54      5027
   macro avg       0.48      0.47      0.48      5027
weighted avg       0.54      0.54      0.54      5027


-- CON LEMATIZAR Y SIN STOPWORDS --

--- Logistic Regression (Lematizado sin stopwords) ---
Accuracy: 0.6529
              precision    recall  f1-score   support

           0       0.79      0.76      0.77      1863
           1       0.67      0.51      0.58      1070
           2       0.60      0.19      0.29       488
           3       0.54      0.76      0.63      1606

    accuracy                           0.65      5027
   macro avg       0.65      0.56      0.57      5027
weighted avg       0.67      0.65      0.64      5027


--- SVM (linear) (Lematizado sin stopwords) ---
Accuracy: 0.6565
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.67      0.52      0.59      1070
           2       0.64      0.18      0.28       488
           3       0.54      0.78      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.66      0.56      0.57      5027
weighted avg       0.67      0.66      0.64      5027


--- Decision Tree (Lematizado sin stopwords) ---
Accuracy: 0.5854
              precision    recall  f1-score   support

           0       0.70      0.74      0.72      1863
           1       0.54      0.50      0.52      1070
           2       0.33      0.24      0.28       488
           3       0.53      0.57      0.55      1606

    accuracy                           0.59      5027
   macro avg       0.53      0.51      0.52      5027
weighted avg       0.58      0.59      0.58      5027


-- CON LEMATIZAR Y EN MINUSCULAS --

--- Logistic Regression (Lematizado en minusculas) ---
Accuracy: 0.6606
              precision    recall  f1-score   support

           0       0.78      0.77      0.77      1863
           1       0.68      0.51      0.59      1070
           2       0.58      0.27      0.37       488
           3       0.56      0.75      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.58      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- SVM (linear) (Lematizado en minusculas) ---
Accuracy: 0.6602
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.68      0.52      0.59      1070
           2       0.59      0.24      0.34       488
           3       0.55      0.78      0.65      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.57      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- Decision Tree (Lematizado en minusculas) ---
Accuracy: 0.5443
              precision    recall  f1-score   support

           0       0.69      0.70      0.69      1863
           1       0.47      0.42      0.44      1070
           2       0.27      0.22      0.25       488
           3       0.49      0.55      0.52      1606

    accuracy                           0.54      5027
   macro avg       0.48      0.47      0.48      5027
weighted avg       0.54      0.54      0.54      5027


-- CON LEMATIZAR Y SIN ACENTOS --

--- Logistic Regression (Lematizado sin acentos) ---
Accuracy: 0.6612
              precision    recall  f1-score   support

           0       0.79      0.76      0.77      1863
           1       0.68      0.52      0.59      1070
           2       0.57      0.27      0.36       488
           3       0.56      0.76      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.58      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- SVM (linear) (Lematizado sin acentos) ---
Accuracy: 0.6600
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.68      0.52      0.59      1070
           2       0.57      0.24      0.34       488
           3       0.55      0.78      0.65      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.57      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- Decision Tree (Lematizado sin acentos) ---
Accuracy: 0.5423
              precision    recall  f1-score   support

           0       0.68      0.70      0.69      1863
           1       0.47      0.42      0.45      1070
           2       0.28      0.24      0.26       488
           3       0.49      0.53      0.51      1606

    accuracy                           0.54      5027
   macro avg       0.48      0.47      0.48      5027
weighted avg       0.54      0.54      0.54      5027


-- CON LEMATIZAR, SIN STOPWORDS Y EN MINUSCULAS --

--- Logistic Regression (Lematizado sin stopwords y en minusculas) ---
Accuracy: 0.6529
              precision    recall  f1-score   support

           0       0.79      0.76      0.77      1863
           1       0.67      0.51      0.58      1070
           2       0.60      0.19      0.29       488
           3       0.54      0.76      0.63      1606

    accuracy                           0.65      5027
   macro avg       0.65      0.56      0.57      5027
weighted avg       0.67      0.65      0.64      5027


--- SVM (linear) (Lematizado sin stopwords y en minusculas) ---
Accuracy: 0.6565
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.67      0.52      0.59      1070
           2       0.64      0.18      0.28       488
           3       0.54      0.78      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.66      0.56      0.57      5027
weighted avg       0.67      0.66      0.64      5027


--- Decision Tree (Lematizado sin stopwords y en minusculas) ---
Accuracy: 0.5854
              precision    recall  f1-score   support

           0       0.70      0.74      0.72      1863
           1       0.54      0.50      0.52      1070
           2       0.33      0.24      0.28       488
           3       0.53      0.57      0.55      1606

    accuracy                           0.59      5027
   macro avg       0.53      0.51      0.52      5027
weighted avg       0.58      0.59      0.58      5027


-- CON LEMATIZAR, SIN ACENTOS Y EN MINUSCULAS --

--- Logistic Regression (Lematizado sin acentos y en minusculas) ---
Accuracy: 0.6612
              precision    recall  f1-score   support

           0       0.79      0.76      0.77      1863
           1       0.68      0.52      0.59      1070
           2       0.57      0.27      0.36       488
           3       0.56      0.76      0.64      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.58      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- SVM (linear) (Lematizado sin acentos y en minusculas) ---
Accuracy: 0.6600
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1863
           1       0.68      0.52      0.59      1070
           2       0.57      0.24      0.34       488
           3       0.55      0.78      0.65      1606

    accuracy                           0.66      5027
   macro avg       0.65      0.57      0.59      5027
weighted avg       0.67      0.66      0.65      5027


--- Decision Tree (Lematizado sin acentos y en minusculas) ---
Accuracy: 0.5423
              precision    recall  f1-score   support

           0       0.68      0.70      0.69      1863
           1       0.47      0.42      0.45      1070
           2       0.28      0.24      0.26       488
           3       0.49      0.53      0.51      1606

    accuracy                           0.54      5027
   macro avg       0.48      0.47      0.48      5027
weighted avg       0.54      0.54      0.54      5027


-- CON LEMATIZAR Y COMPLETO --

--- Logistic Regression (Lematizado completo) ---
Accuracy: 0.6531
              precision    recall  f1-score   support

           0       0.79      0.77      0.78      1863
           1       0.67      0.51      0.57      1070
           2       0.60      0.20      0.30       488
           3       0.54      0.76      0.63      1606

    accuracy                           0.65      5027
   macro avg       0.65      0.56      0.57      5027
weighted avg       0.66      0.65      0.64      5027


--- SVM (linear) (Lematizado completo) ---
Accuracy: 0.6547
              precision    recall  f1-score   support

           0       0.79      0.75      0.77      1863
           1       0.67      0.52      0.58      1070
           2       0.65      0.18      0.28       488
           3       0.54      0.78      0.64      1606

    accuracy                           0.65      5027
   macro avg       0.66      0.56      0.57      5027
weighted avg       0.67      0.65      0.64      5027


--- Decision Tree (Lematizado completo) ---
Accuracy: 0.5836
              precision    recall  f1-score   support

           0       0.71      0.74      0.72      1863
           1       0.52      0.49      0.50      1070
           2       0.32      0.22      0.26       488
           3       0.53      0.57      0.55      1606

    accuracy                           0.58      5027
   macro avg       0.52      0.51      0.51      5027
weighted avg       0.57      0.58      0.58      5027