In [1]:
import pandas as pd
import mlflow


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/alura-cursos/2104-mlflow/aula1/casas.csv')
df.head()



Unnamed: 0,tamanho,ano,garagem,preco
0,159.0,2003,2,208500
1,117.0,1976,2,181500
2,166.0,2001,2,223500
3,160.0,1915,3,140000
4,204.0,2000,3,250000


In [3]:
df.columns

Index(['tamanho', 'ano', 'garagem', 'preco'], dtype='object')

## Preparacao dos Dados

In [3]:
y = df[['preco']].copy()
X = df.drop('preco', axis=1)

In [4]:
X.head()

Unnamed: 0,tamanho,ano,garagem
0,159.0,2003,2
1,117.0,1976,2
2,166.0,2001,2
3,160.0,1915,3
4,204.0,2000,3


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [6]:
scaler_stand = StandardScaler()
scaler_min_max = MinMaxScaler()

In [7]:
a = [[1,4,5,6,3,5]]
scaler_min_max.fit(a)

In [8]:
def pipeline_ml(model, kind=0):
    if kind==0:
        return make_pipeline(scaler_stand, model)
    elif kind==1:
        return make_pipeline(scaler_min_max, model)


## Inicio dos Treinamentos

In [9]:
mlflow.set_experiment('houses-prices-ml-project')
mlflow.sklearn.autolog()

with mlflow.start_run(run_name='Regressao-Linear'):

    linear_reg = LinearRegression()
    linear_reg.fit(X_train, y_train)

    y_predict = linear_reg.predict(X_test)

    mse = mean_squared_error(y_true=y_test, y_pred=y_predict)
    r2 = r2_score(y_true=y_test, y_pred=y_predict)


    mlflow.log_metrics({
            'mse': mse,
            "r2": r2
        })







2023/04/12 23:45:26 INFO mlflow.tracking.fluent: Experiment with name 'houses-prices-ml-project' does not exist. Creating a new experiment.


In [10]:
with mlflow.start_run(run_name='svm-regressior'):

    svm_reg = SVR()
    svm_reg.fit(X_train, y_train)

    y_predict = svm_reg.predict(X_test)

    mse = mean_squared_error(y_true=y_test, y_pred=y_predict)
    r2 = r2_score(y_true=y_test, y_pred=y_predict)


    mlflow.log_metrics({
            'mse': mse,
            "r2": r2
        })


  y = column_or_1d(y, warn=True)


In [11]:
with mlflow.start_run(run_name='random-forest-regressior'):

    rf_reg = RandomForestRegressor(max_depth=7)
    rf_reg.fit(X_train, y_train)

    y_predict = rf_reg.predict(X_test)

    mse = mean_squared_error(y_true=y_test, y_pred=y_predict)
    r2 = r2_score(y_true=y_test, y_pred=y_predict)


    mlflow.log_metrics({
            'mse': mse,
            "r2": r2
        })



In [20]:
scaler_stand.fit(X_train)
X_train_scaler = scaler_stand.transform(X_train)
X_test_scaler = scaler_stand.transform(X_test)

In [39]:
import numpy as np
X_train.reset_index(inplace=True)
scaler_stand.fit_transform(np.array(X_train))


array([[-0.26347343, -0.89978359, -0.26050401,  0.29309491],
       [-0.26815196, -0.77630822,  1.1709873 , -2.43687485],
       [ 1.34594062,  0.17033624,  1.10591951,  1.6580798 ],
       ...,
       [ 1.32488724, -1.27020969, -0.52077515,  0.29309491],
       [ 0.3096464 , -0.20008985, -1.72452921, -1.07188997],
       [ 0.93189079,  0.04686088,  1.1709873 ,  1.6580798 ]])

In [37]:
X_train.isnull()


Unnamed: 0,tamanho,ano,garagem
615,False,False,False
613,False,False,False
1303,False,False,False
486,False,False,False
561,False,False,False
...,...,...,...
1095,False,False,False
1130,False,False,False
1294,False,False,False
860,False,False,False


In [None]:
with mlflow.start_run(run_name='Regressao-Linear-scaler'):

    linear_reg = LinearRegression()
    pipeline_lr = pipeline_ml(linear_reg)


    linear_reg.fit(X_train, y_train)

    y_predict = linear_reg.predict(X_test)

    mse = mean_squared_error(y_true=y_test, y_pred=y_predict)
    r2 = r2_score(y_true=y_test, y_pred=y_predict)


    mlflow.log_metrics({
            'mse': mse,
            "r2": r2
        })