In [1]:
# libs
import warnings
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import mlflow

warnings.filterwarnings('ignore')

In [2]:
# Carregando a base de dados
red = pd.read_csv('winequality-red.csv',sep=';')

In [3]:
# Visualizando as 5 primeiras linhas 
red.head(5)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [4]:
red['good_wine'] = [1 if quality >= 6 else 0 for quality in red['quality']]

In [5]:
red.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,good_wine
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,0
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,0
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0


In [6]:
#limpando espaços no nomes das colunas

red.columns =  red.columns.str.replace(' ','_')

In [7]:
 red.columns.values

array(['fixed_acidity', 'volatile_acidity', 'citric_acid',
       'residual_sugar', 'chlorides', 'free_sulfur_dioxide',
       'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol',
       'quality', 'good_wine'], dtype=object)

In [8]:
X = red.drop(['good_wine', 'quality'], axis = 1)
y = red['good_wine']

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size = 0.25, 
                                                    random_state = 42,
                                                    stratify = y)

In [9]:
X_train.shape, X_test.shape

((1199, 11), (400, 11))

In [10]:
DB_URI = 'sqlite:///mlrunsdb.db'
mlflow.set_tracking_uri(DB_URI)

tags = {
    "Módulo":"Modelos Produtivos 1",
    "Integrantes do grupo":['Josimar','Vitor'],
    "objeto":'wines'
}

#--------------------

experiment = mlflow.set_experiment(experiment_name='wine_quality')
mlflow.set_experiment_tags(tags=tags)

2022/11/15 12:56:03 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2022/11/15 12:56:04 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

In [11]:
mlflow.set_experiment_tags(tags=tags)

In [12]:
# Get Experiment Details
print("Nome: {}".format(experiment.name))
print("Experiment_id: {}".format(experiment.experiment_id))
print("Artifact Location: {}".format(experiment.artifact_location))
print("Tags: {}".format(experiment.tags))
print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))

Nome: wine_quality
Experiment_id: 1
Artifact Location: ./mlruns/1
Tags: {}
Lifecycle_stage: active


In [13]:
def get_metrics(y_test:list, y_pred:list) -> list:
    ac = accuracy_score(y_test, y_pred)
    pr = precision_score(y_test, y_pred, average='weighted')
    rc = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    return [ac, pr, rc, f1]

In [14]:
with mlflow.start_run(
    run_name='API',
    description='Wine quality',
    tags={"version":"v1","env":"dev"}
) as model_run:


   
    svm = SVC()
    model = Pipeline([('scaler',StandardScaler()),('svm',svm)])
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    # mc = confusion_matrix(y_test.values.ravel(), y_pred)
    ac, pr, rc, f1 = get_metrics(y_test, y_pred)


    # mlflow.log_param("k",k)
    # mlflow.log_param("size_train_dataset", len(X_train))
    # mlflow.log_param("size_test_dataset", len(X_test))

    params = {
        "size_train_dataset": len(X_train),
        "size_test_dataset": len(X_test),
    }

    metrics = {
        "acuracia":ac,
        "precision":pr,
        "recall":rc,
        "f1":f1
    }

    mlflow.log_params(params=params)
    mlflow.log_metrics(metrics=metrics)

    mlflow.sklearn.log_model(model, "model")