In [40]:
# Importacion de Librerias

import pandas as pd
import numpy as np
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.datasets import load_wine

In [12]:
# Carga de datos

data = pd.DataFrame(data=load_wine().data, columns=load_wine().feature_names)
data['target'] = load_wine().target

data.info()
data.head(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   alcohol                       178 non-null    float64
 1   malic_acid                    178 non-null    float64
 2   ash                           178 non-null    float64
 3   alcalinity_of_ash             178 non-null    float64
 4   magnesium                     178 non-null    float64
 5   total_phenols                 178 non-null    float64
 6   flavanoids                    178 non-null    float64
 7   nonflavanoid_phenols          178 non-null    float64
 8   proanthocyanins               178 non-null    float64
 9   color_intensity               178 non-null    float64
 10  hue                           178 non-null    float64
 11  od280/od315_of_diluted_wines  178 non-null    float64
 12  proline                       178 non-null    float64
 13  targe

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [28]:
X = data.drop('target', axis=1)
y = data['target']

# Normalizacion de Datos
scaler = StandardScaler()
X_Norm = scaler.fit_transform(X)

In [30]:
# Datos de entrada originales
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Datos de entrada normalizados
X_train_Norm, X_test_Norm, y_train_Norm, y_test_Norm = train_test_split(X_Norm, y, test_size=0.25, random_state=42)


In [63]:
# Creo una funsion para evaluar cada uno de los modelos de entrenamiento y comparar resultados luego

def Evaluacion_Modelo(Modelo, X_train, y_train, X_test, y_test):
    Modelo.fit(X_train, y_train)
    y_pred = Modelo.predict(X_test)
    Exactitud = accuracy_score(y_test, y_pred)

    confusion = confusion_matrix(y_test, y_pred)
    #print(f"Exactitud: {Exactitud:.2f}")
    print(f"Matriz de Confusion:\n{confusion}")

    return {"Modelo": str(Modelo), "Exactitud": Exactitud}

Resultados = []
Resultados_Norm = []

In [65]:
# Regresión logística

log_reg = LogisticRegression(max_iter=8000)
Resultados.append(Evaluacion_Modelo(log_reg, X_train, y_train, X_test, y_test))
# En este caso se decision aumentar el numero de iteraciones del modelo para lograr la correcta convergencia del mismo
# si bien se sacrifica tiempo de computo, tambien se logra mayor precision del modelo

print("\n")
print("Resultado Normalizado")
log_reg_scaled = LogisticRegression()
Resultados_Norm.append(Evaluacion_Modelo(log_reg_scaled, X_train_Norm, y_train_Norm, X_test_Norm, y_test_Norm))


Matriz de Confusion:
[[15  0  0]
 [ 0 18  0]
 [ 0  0 12]]


Matriz de Confusion:
[[15  0  0]
 [ 0 17  1]
 [ 0  0 12]]


In [67]:
# KNN
knn = KNeighborsClassifier(n_neighbors=3, weights='uniform')
Resultados.append(Evaluacion_Modelo(knn, X_train, y_train, X_test, y_test))
print("\n")
print("Resultado Normalizado")
knn_scaled = KNeighborsClassifier(n_neighbors=3, weights='uniform')
Resultados_Norm.append(Evaluacion_Modelo(knn_scaled, X_train_Norm, y_train_Norm, X_test_Norm, y_test_Norm))


Matriz de Confusion:
[[13  0  2]
 [ 1 13  4]
 [ 1  3  8]]


Matriz de Confusion:
[[15  0  0]
 [ 1 16  1]
 [ 0  0 12]]


In [69]:
# SVM
svm = SVC(kernel='linear')
Resultados.append(Evaluacion_Modelo(svm, X_train, y_train, X_test, y_test))
print("\n")
print("Resultado Normalizado")
svm_scaled = SVC(kernel='linear')
Resultados_Norm.append(Evaluacion_Modelo(svm_scaled, X_train_Norm, y_train_Norm, X_test_Norm, y_test_Norm))


Matriz de Confusion:
[[15  0  0]
 [ 0 17  1]
 [ 0  0 12]]


Matriz de Confusion:
[[15  0  0]
 [ 0 17  1]
 [ 0  0 12]]


In [79]:
# Comparacion de los modelos
Comparacion= pd.DataFrame(Resultados)
Comparacion_Norm= pd.DataFrame(Resultados_Norm)
Comparacion["Normalizado"]= False
Comparacion_Norm["Normalizado"]= True
Resultado_Final = pd.concat([Comparacion, Comparacion_Norm])

In [81]:
Resultado_Final

Unnamed: 0,Modelo,Exactitud,Normalizado
0,LogisticRegression(max_iter=8000),1.0,False
1,KNeighborsClassifier(n_neighbors=3),0.755556,False
2,SVC(kernel='linear'),0.977778,False
0,LogisticRegression(),0.977778,True
1,KNeighborsClassifier(n_neighbors=3),0.955556,True
2,SVC(kernel='linear'),0.977778,True
