In [1]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, f1_score

In [2]:
X_train = pd.read_csv("./data_processed/X_train.csv", index_col=None, encoding='utf-8')
y_train = pd.read_csv("./data_processed/y_train.csv", index_col=None, encoding='utf-8')

X_test = pd.read_csv("./data_processed/X_test.csv", index_col=None, encoding='utf-8')
y_test = pd.read_csv("./data_processed/y_test.csv", index_col=None, encoding='utf-8')

In [3]:
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

lr_model = LogisticRegression()
# Se ajustan los datos al modelo
lr_model.fit(X_train, y_train)

#Se crea un objeto KFold con 5 divisiones (n_splits=5) que se utilizará para dividir los datos en 5 conjuntos diferentes 
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Se aplica la validación cruzada al modelo de regresión logística 
lr_scores = cross_val_score(lr_model, X_train, y_train, cv=kf)
# Imprimimos los resultados de la validación cruzada
print(f"Accuracy LR (CrossVal): {np.mean(lr_scores)*100:.4f}%")

Accuracy LR (CrossVal): 60.8539%


In [4]:
predictions = lr_model.predict(X_test)

# Calcular la exactitud
exactitud = round(accuracy_score(y_test, predictions),2)
print("Exactitud:", exactitud)

# Calcular la preci sión
precision = round(precision_score(y_test, predictions),2)
print("Precisión:", precision)

# Calcular la puntuación F1
f1 = round(f1_score(y_test, predictions),2)
print("Puntuación F1:", f1)

Exactitud: 0.58
Precisión: 0.58
Puntuación F1: 0.57
