In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../data/processed/casas.csv')

In [3]:
df.head()

Unnamed: 0,tamanho,ano,garagem,preco
0,159.0,2003,2,208500
1,117.0,1976,2,181500
2,166.0,2001,2,223500
3,160.0,1915,3,140000
4,204.0,2000,3,250000


In [4]:
X = df.drop('preco', axis=1)
y = df['preco'].copy()

In [5]:
X.head()

Unnamed: 0,tamanho,ano,garagem
0,159.0,2003,2
1,117.0,1976,2
2,166.0,2001,2
3,160.0,1915,3
4,204.0,2000,3


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
X_train.shape

(1022, 3)

In [8]:
X_test.shape

(438, 3)

In [9]:
import mlflow

In [10]:
mlflow.create_experiment('house-prices-eda')

'1'

# Regressao Linear

In [11]:
mlflow.start_run(experiment_id='1')

<ActiveRun: >

In [12]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [13]:
mlflow.sklearn.log_model(lr, 'lr')

In [14]:
lr_pred = lr.predict(X_test)

In [15]:
X_test.iloc[0]

tamanho      99.0
ano        1963.0
garagem       1.0
Name: 892, dtype: float64

In [16]:
list(y_test)[0]

154500

In [17]:
from sklearn.metrics import mean_squared_error, r2_score

In [18]:
import math

mse = mean_squared_error(y_test, lr_pred) # erro quadrado médio
rmse = math.sqrt(mse) # raiz qudrada erro medio
r2 = r2_score(y_test, lr_pred) # r quadrado

mlflow.log_metric('mse', mse)
mlflow.log_metric('rmse', rmse)
mlflow.log_metric('r2', r2)

In [19]:
rmse

45592.39978251848

In [20]:
r2

0.7021153642898048

In [21]:
mlflow.end_run()

# XGB Regressor

In [22]:
from xgboost import XGBRFRegressor, XGBRegressor

In [23]:
xgb_params = {
    'learning_rate': 0.2,
    'n_estimators': 50,
    'random_state': 42
}

with mlflow.start_run(experiment_id='1'):
    xgb = XGBRegressor(**xgb_params)
    xgb.fit(X_train, y_train)
    mlflow.xgboost.log_model(xgb, 'xgboost')
    
    xgb_pred = xgb.predict(X_test)
    
    mse = mean_squared_error(y_test, xgb_pred) # erro quadrado médio
    rmse = math.sqrt(mse) # raiz qudrada erro medio
    r2 = r2_score(y_test, xgb_pred)
    
    mlflow.log_metrics({'mse': mse, 'rmse': rmse, 'r2': r2})

In [24]:
xgb_pred = xgb.predict(X_test)

In [25]:
mse = mean_squared_error(y_test, xgb_pred) # erro quadrado médio
rmse = math.sqrt(mse) # raiz qudrada erro medio
r2 = r2_score(y_test, xgb_pred)

In [26]:
mse

1386727460.1346002

In [27]:
rmse

37238.789724353286

In [28]:
r2

0.8012741720529797

In [29]:
mlflow.get_experiment('1')

<Experiment: artifact_location='file:///home/kyle/Projetos/house-prices-mlflow/mlflow/notebooks/mlruns/1', experiment_id='1', lifecycle_stage='active', name='house-prices-eda', tags={}>

In [30]:
mlflow.get_experiment_by_name('house-prices-eda')

<Experiment: artifact_location='file:///home/kyle/Projetos/house-prices-mlflow/mlflow/notebooks/mlruns/1', experiment_id='1', lifecycle_stage='active', name='house-prices-eda', tags={}>

In [31]:
mlflow.list_run_infos('1')

[<RunInfo: artifact_uri='file:///home/kyle/Projetos/house-prices-mlflow/mlflow/notebooks/mlruns/1/ac601cfe750542e59e29c37c8c5ab29a/artifacts', end_time=1622043929531, experiment_id='1', lifecycle_stage='active', run_id='ac601cfe750542e59e29c37c8c5ab29a', run_uuid='ac601cfe750542e59e29c37c8c5ab29a', start_time=1622043929321, status='FINISHED', user_id='kyle'>,
 <RunInfo: artifact_uri='file:///home/kyle/Projetos/house-prices-mlflow/mlflow/notebooks/mlruns/1/48a0dd20b3f44343b644efc2bf14e83c/artifacts', end_time=1622043924321, experiment_id='1', lifecycle_stage='active', run_id='48a0dd20b3f44343b644efc2bf14e83c', run_uuid='48a0dd20b3f44343b644efc2bf14e83c', start_time=1622043911491, status='FINISHED', user_id='kyle'>]

In [33]:
mlflow.get_run('ac601cfe750542e59e29c37c8c5ab29a')

<Run: data=<RunData: metrics={'mse': 1386727460.1346002,
 'r2': 0.8012741720529797,
 'rmse': 37238.789724353286}, params={}, tags={'mlflow.log-model.history': '[{"run_id": "ac601cfe750542e59e29c37c8c5ab29a", '
                             '"artifact_path": "xgboost", "utc_time_created": '
                             '"2021-05-26 15:45:29.504939", "flavors": '
                             '{"python_function": {"loader_module": '
                             '"mlflow.xgboost", "python_version": "3.8.5", '
                             '"data": "model.xgb", "env": "conda.yaml"}, '
                             '"xgboost": {"xgb_version": "1.4.2", "data": '
                             '"model.xgb"}}}]',
 'mlflow.source.git.commit': '5fbe2e6ccbe63c62cb8b87fa690f3dc5ca350e2f',
 'mlflow.source.name': '/home/kyle/Projetos/house-prices-mlflow/.venv/lib/python3.8/site-packages/ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'kyle'}>, info=<RunInfo: artifact_uri='file:///h

In [34]:
[mlflow.get_run(run.run_id).data.metrics for run in mlflow.list_run_infos('1')]

[{'mse': 1386727460.1346002,
  'r2': 0.8012741720529797,
  'rmse': 37238.789724353286},
 {'mse': 2078666917.9289908,
  'r2': 0.7021153642898048,
  'rmse': 45592.39978251848}]