In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings

# Desactivar las advertencias de Sklearn
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Cargar los datasets desde los archivos CSV
swedish_auto_data = pd.read_csv('AutoInsurSweden.csv')
wine_quality_data = pd.read_csv('wine-Quality.csv')
pima_diabetes_data = pd.read_csv('pima-indians-diabetes.csv')

# Categorizar los valores de Y en Swedish Auto Insurance Dataset
quartiles = swedish_auto_data['Y'].quantile([0.25, 0.5, 0.75])
low_limit = quartiles.iloc[0]
medium_limit = quartiles.iloc[1]
high_limit = quartiles.iloc[2]

def categorize_y(y):
    if y <= low_limit:
        return 'bajo'
    elif y <= medium_limit:
        return 'medio'
    else:
        return 'alto'

swedish_auto_data['Y_category'] = swedish_auto_data['Y'].apply(categorize_y)

# Definir una función para implementar clasificadores y calcular métricas de evaluación
def evaluate_classifier(X, y, classifier):
    # Dividir los datos en conjunto de entrenamiento y conjunto de prueba
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Entrenar el clasificador
    if classifier == LogisticRegression:
        model = LogisticRegression(max_iter=1000)
    else:
        model = classifier()
    model.fit(X_train, y_train)

    # Hacer predicciones
    y_pred = model.predict(X_test)

    # Calcular métricas de evaluación
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division='warn')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    # Imprimir métricas
    print("\nMétricas: ", classifier.__name__)
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)

# Swedish Auto Insurance Dataset
print("\nSwedish Auto Insurance Dataset:")
X_swedish = swedish_auto_data[['X']]
y_swedish = swedish_auto_data['Y_category']
evaluate_classifier(X_swedish, y_swedish, LogisticRegression)
evaluate_classifier(X_swedish, y_swedish, KNeighborsClassifier)
evaluate_classifier(X_swedish, y_swedish, SVC)
evaluate_classifier(X_swedish, y_swedish, GaussianNB)
evaluate_classifier(X_swedish, y_swedish, MLPClassifier)

# Wine Quality Dataset
print("\nWine Quality Dataset:")
X_wine = wine_quality_data.drop('quality', axis=1)
y_wine = wine_quality_data['quality']
evaluate_classifier(X_wine, y_wine, LogisticRegression)
evaluate_classifier(X_wine, y_wine, KNeighborsClassifier)
evaluate_classifier(X_wine, y_wine, SVC)
evaluate_classifier(X_wine, y_wine, GaussianNB)
evaluate_classifier(X_wine, y_wine, MLPClassifier)

# Pima Indians Diabetes Dataset
print("\nPima Indians Diabetes Dataset:")
X_pima = pima_diabetes_data.drop('Class variable (0 or 1)', axis=1)
y_pima = pima_diabetes_data['Class variable (0 or 1)']
evaluate_classifier(X_pima, y_pima, LogisticRegression)
evaluate_classifier(X_pima, y_pima, KNeighborsClassifier)
evaluate_classifier(X_pima, y_pima, SVC)
evaluate_classifier(X_pima, y_pima, GaussianNB)
evaluate_classifier(X_pima, y_pima, MLPClassifier)


Swedish Auto Insurance Dataset:

Métricas:  LogisticRegression
Accuracy: 0.6923076923076923
Precision: 0.8653846153846154
Recall: 0.6923076923076923
F1 Score: 0.7433198380566802

Métricas:  KNeighborsClassifier
Accuracy: 0.6153846153846154
Precision: 0.8615384615384615
Recall: 0.6153846153846154
F1 Score: 0.6837606837606838

Métricas:  SVC
Accuracy: 0.6923076923076923
Precision: 0.8717948717948718
Recall: 0.6923076923076923
F1 Score: 0.7510121457489879

Métricas:  GaussianNB
Accuracy: 0.5384615384615384
Precision: 0.8717948717948718
Recall: 0.5384615384615384
F1 Score: 0.6357466063348416

Métricas:  MLPClassifier
Accuracy: 0.8461538461538461
Precision: 0.7159763313609468
Recall: 0.8461538461538461
F1 Score: 0.7756410256410255

Wine Quality Dataset:

Métricas:  LogisticRegression
Accuracy: 0.575
Precision: 0.5287327241022991
Recall: 0.575
F1 Score: 0.5405306044850028

Métricas:  KNeighborsClassifier
Accuracy: 0.45625
Precision: 0.4222930372807017
Recall: 0.45625
F1 Score: 0.42988238113

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings

# Desactivar las advertencias de Sklearn
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Clasificadores disponibles en Scikit-learn
classifiers = {
    "Regresión Logística": LogisticRegression,
    "K-Vecinos Cercanos": KNeighborsClassifier,
    "Máquinas de Vectores de Soporte": SVC,
    "Naive Bayes": GaussianNB,
    "Red Neuronal": MLPClassifier
}

# Cargar los datasets desde los archivos CSV
swedish_auto_data = pd.read_csv('AutoInsurSweden.csv')
wine_quality_data = pd.read_csv('wine-Quality.csv')
pima_diabetes_data = pd.read_csv('pima-indians-diabetes.csv')

# Categorizar los valores de Y en Swedish Auto Insurance Dataset
quartiles = swedish_auto_data['Y'].quantile([0.25, 0.5, 0.75])
low_limit = quartiles.iloc[0]
medium_limit = quartiles.iloc[1]
high_limit = quartiles.iloc[2]

def categorize_y(y):
    if y <= low_limit:
        return 'bajo'
    elif y <= medium_limit:
        return 'medio'
    else:
        return 'alto'

swedish_auto_data['Y_category'] = swedish_auto_data['Y'].apply(categorize_y)

# Definir una función para implementar clasificadores y calcular métricas de evaluación
def evaluate_classifier(X, y, classifier_name):
    # Obtener el clasificador correspondiente al nombre
    classifier = classifiers[classifier_name]

    # Dividir los datos en conjunto de entrenamiento y conjunto de prueba
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Entrenar el clasificador
    model = classifier()
    if classifier == LogisticRegression:
        model = classifier(max_iter=1000)
    model.fit(X_train, y_train)

    # Hacer predicciones
    y_pred = model.predict(X_test)

    # Calcular métricas de evaluación
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division='warn')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    # Imprimir métricas junto con el nombre del método de clasificación
    print("\nMétricas para el método de clasificación", classifier_name, ":")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)

# Swedish Auto Insurance Dataset
print("\nSwedish Auto Insurance Dataset:")
X_swedish = swedish_auto_data[['X']]
y_swedish = swedish_auto_data['Y_category']
for classifier_name in classifiers:
    evaluate_classifier(X_swedish, y_swedish, classifier_name)

# Wine Quality Dataset
print("\nWine Quality Dataset:")
X_wine = wine_quality_data.drop('quality', axis=1)
y_wine = wine_quality_data['quality']
for classifier_name in classifiers:
    evaluate_classifier(X_wine, y_wine, classifier_name)

# Pima Indians Diabetes Dataset
print("\nPima Indians Diabetes Dataset:")
X_pima = pima_diabetes_data.drop('Class variable (0 or 1)', axis=1)
y_pima = pima_diabetes_data['Class variable (0 or 1)']
for classifier_name in classifiers:
    evaluate_classifier(X_pima, y_pima, classifier_name)


Swedish Auto Insurance Dataset:

Métricas para el método de clasificación Regresión Logística :
Accuracy: 0.6923076923076923
Precision: 0.8653846153846154
Recall: 0.6923076923076923
F1 Score: 0.7433198380566802

Métricas para el método de clasificación K-Vecinos Cercanos :
Accuracy: 0.6153846153846154
Precision: 0.8615384615384615
Recall: 0.6153846153846154
F1 Score: 0.6837606837606838

Métricas para el método de clasificación Máquinas de Vectores de Soporte :
Accuracy: 0.6923076923076923
Precision: 0.8717948717948718
Recall: 0.6923076923076923
F1 Score: 0.7510121457489879

Métricas para el método de clasificación Naive Bayes :
Accuracy: 0.5384615384615384
Precision: 0.8717948717948718
Recall: 0.5384615384615384
F1 Score: 0.6357466063348416

Métricas para el método de clasificación Red Neuronal :
Accuracy: 0.8461538461538461
Precision: 0.7159763313609468
Recall: 0.8461538461538461
F1 Score: 0.7756410256410255

Wine Quality Dataset:

Métricas para el método de clasificación Regresión L