In [332]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.dummy import DummyClassifier
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
import joblib
import json
from datetime import date
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import (mean_squared_error, mean_absolute_error, r2_score,
                             accuracy_score, precision_score, recall_score,
                             f1_score, confusion_matrix, roc_auc_score, ConfusionMatrixDisplay)

In [333]:
vino = load_wine()

In [334]:
#EDA

In [335]:
X_train, X_test, y_train, y_test = train_test_split(vino.data, vino.target, test_size=0.9, random_state=42)

In [336]:
dummy = DummyClassifier()
dummy.fit(X_train, y_train)
y_pred_dummy = dummy.predict(X_test)

f1_dummy = f1_score(y_test, y_pred_dummy, average="weighted")

print(f"f1 score Base: {f1_dummy: .4f}")

cm_dummy = confusion_matrix(y_test, y_pred_dummy)

print("Matriz de confusión (Dummy Classifier):")
print(cm_dummy)

f1 score Base:  0.2082
Matriz de confusión (Dummy Classifier):
[[ 0 56  0]
 [ 0 61  0]
 [ 0 44  0]]


In [337]:
forest = RandomForestClassifier(random_state=42)
forest.fit(X_train, y_train)
y_pred_forest = forest.predict(X_test)

f1_forest = f1_score(y_test, y_pred_forest, average="weighted")

print(f"f1 score Base: {f1_forest: .4f}")

cm_forest = confusion_matrix(y_test, y_pred_forest)

print("Matriz de confusión (RandomForest):")
print(cm_forest)

f1 score Base:  0.8175
Matriz de confusión (RandomForest):
[[31 25  0]
 [ 1 58  2]
 [ 0  0 44]]


In [338]:
suppVM = SVC(random_state=42)
suppVM.fit(X_train, y_train)
y_pred_suppVM = suppVM.predict(X_test)

f1_suppVM = f1_score(y_test, y_pred_suppVM, average="weighted")

print(f"f1 score Base: {f1_suppVM: .4f}")

cm_suppVM = confusion_matrix(y_test, y_pred_suppVM)

print("Matriz de confusión (RandomsuppVM):")
print(cm_suppVM)

f1 score Base:  0.5599
Matriz de confusión (RandomsuppVM):
[[44 12  0]
 [ 1 60  0]
 [ 0 44  0]]


In [339]:
param_grid = {
    "svc__C": [0.1, 1, 10, 100],
    "svc__gamma": [0.001, 0.01, 0.1, 1],
    "svc__kernel": ["linear", "rbf", "poly"]
}

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("svc", SVC())
])

grid = GridSearchCV(pipeline, param_grid, cv=5, scoring="f1_weighted")
grid.fit(X_train, y_train)

print("Mejores parámetros:", grid.best_params_)
print("Mejor F1 en validación cruzada:", grid.best_score_)



Mejores parámetros: {'svc__C': 0.1, 'svc__gamma': 0.001, 'svc__kernel': 'linear'}
Mejor F1 en validación cruzada: 1.0


In [340]:
best_model = grid.best_estimator_
y_pred_best = best_model.predict(X_test)


f1_best = f1_score(y_test, y_pred_best, average="weighted")
print(f"F1 Score con SVC optimizado: {f1_best:.4f}")

cm_Gr = confusion_matrix(y_test, y_pred_best)

print("Matriz de confusión (Random Gr):")
print(cm_Gr)

F1 Score con SVC optimizado: 0.9003
Matriz de confusión (Random Gr):
[[45 11  0]
 [ 3 57  1]
 [ 0  1 43]]


In [341]:
pipe_SuppVMO= Pipeline([
    ("scaler", StandardScaler()),
    ("svc", SVC(kernel="linear",gamma=0.001,C=0.1,random_state=42))
])

pipe_SuppVMO.fit(X_train, y_train)
y_pred_pipe_SuppVMO = pipe_SuppVMO.predict(X_test)

f1_pipe_SuppVMO = f1_score(y_test, y_pred_pipe_SuppVMO, average="weighted")

print(f"f1 score Base: {f1_pipe_SuppVMO: .4f}")

cm_pipe_SuppVMO = confusion_matrix(y_test, y_pred_pipe_SuppVMO)

print("Matriz de confusión (Random pipe_SuppVMO):")
print(cm_pipe_SuppVMO)

f1 score Base:  0.9003
Matriz de confusión (Random pipe_SuppVMO):
[[45 11  0]
 [ 3 57  1]
 [ 0  1 43]]


In [342]:
print(f"f1 score Base: {f1_dummy: .4f}")
print(f"f1 score RandomForest: {f1_forest: .4f}")
print(f"f1 score SVC: {f1_suppVM: .4f}")
print(f"F1 Score SVC Grilla {f1_best:.4f}")

f1 score Base:  0.2082
f1 score RandomForest:  0.8175
f1 score SVC:  0.5599
F1 Score SVC Grilla 0.9003


In [343]:
rf = pipe_SuppVMO.fit(X_train, y_train)
joblib.dump(rf, "ClasificadorVinos.joblib")

['ClasificadorVinos.joblib']

In [344]:
X = pd.DataFrame(vino.data, columns=vino.feature_names)
metadata = {
      "model_name": "SVC",
      "date":str(date.today()) ,
      "f1_score": "weighted" ,
      "features": list(X.columns),
      "random_state": 42,
}

with open("metadata.json", "w") as f:
  json.dump(metadata, f, indent = 4)