In [1]:
import pandas as pd
import mlflow
import mlflow.sklearn
# warnings.filterwarnings("ignore", category=DeprecationWarning)

In [2]:
df = pd.read_csv('../data/processed/casas.csv')

In [3]:
df.head()

Unnamed: 0,tamanho,ano,garagem,preco
0,159.0,2003,2,208500
1,117.0,1976,2,181500
2,166.0,2001,2,223500
3,160.0,1915,3,140000
4,204.0,2000,3,250000


In [4]:
X = df.drop('preco',axis=1)
y = df['preco'].copy()

In [5]:
X.head()

Unnamed: 0,tamanho,ano,garagem
0,159.0,2003,2
1,117.0,1976,2
2,166.0,2001,2
3,160.0,1915,3
4,204.0,2000,3


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
X_train.shape

(1022, 3)

In [8]:
X_test.shape

(438, 3)

In [9]:
mlflow.set_experiment('house-prices-eda')

INFO: 'house-prices-eda' does not exist. Creating a new experiment


In [10]:
mlflow.start_run()

<ActiveRun: >

In [11]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [12]:
mlflow.sklearn.log_model(lr,'lr')

In [13]:
lr_predicted = lr.predict(X_test)

In [14]:
len(lr_predicted)

438

In [15]:
X_test.iloc[0]

tamanho      99.0
ano        1963.0
garagem       1.0
Name: 892, dtype: float64

In [16]:
y_test

892     154500
1105    325000
413     115000
522     159000
1036    315500
         ...  
331     139000
323     126175
650     205950
439     110000
798     485000
Name: preco, Length: 438, dtype: int64

In [17]:
from sklearn.metrics import mean_squared_error, r2_score

In [18]:
import math

mse = mean_squared_error(y_test, lr_predicted)
rmse =  math.sqrt(mse)
r2 = r2_score(y_test, lr_predicted)
mlflow.log_metric('mse',mse)
mlflow.log_metric('rmse',rmse)
mlflow.log_metric('r2',r2)

In [19]:
mse

2078666917.9289908

In [20]:
rmse

45592.39978251848

In [21]:
r2

0.7021153642898048

In [22]:
mlflow.end_run()

In [23]:
from xgboost import XGBRFRegressor, XGBRegressor

In [24]:
xgb_params = {
    'learning_rate':0.2,
    'n_estimators': 50,
    'random_state':42
}

with mlflow.start_run():
    xgb = XGBRegressor(**xgb_params)
    xgb.fit(X_train, y_train)
    mlflow.xgboost.log_model(xgb,'xgboost')
    xgb_predicted = xgb.predict(X_test)
    mse = mean_squared_error(y_test, xgb_predicted)
    rmse =  math.sqrt(mse)
    r2 = r2_score(y_test, xgb_predicted)
    mlflow.log_metric('mse',mse)
    mlflow.log_metric('rmse',rmse)
    mlflow.log_metric('r2',r2)



In [25]:
mse

1386727460.1346002

In [26]:
rmse

37238.789724353286

In [27]:
r2

0.8012741720529797

In [28]:
mlflow.get_experiment_by_name('house-prices-eda')

<Experiment: artifact_location='file:///mnt/d/Onderive/Particular/onedrive/Projetos/python/mlflow2/mlflow2/notebooks/mlruns/1', experiment_id='1', lifecycle_stage='active', name='house-prices-eda', tags={}>

In [29]:
mlflow.list_run_infos('1')

[<RunInfo: artifact_uri='file:///mnt/d/Onderive/Particular/onedrive/Projetos/python/mlflow2/mlflow2/notebooks/mlruns/1/12edd5db73b84b5aa8f44248470599fa/artifacts', end_time=1619730171566, experiment_id='1', lifecycle_stage='active', run_id='12edd5db73b84b5aa8f44248470599fa', run_uuid='12edd5db73b84b5aa8f44248470599fa', start_time=1619730168231, status='FINISHED', user_id='dramos'>,
 <RunInfo: artifact_uri='file:///mnt/d/Onderive/Particular/onedrive/Projetos/python/mlflow2/mlflow2/notebooks/mlruns/1/4e0a4923dfee4914b43f0f648a016b01/artifacts', end_time=1619730164965, experiment_id='1', lifecycle_stage='active', run_id='4e0a4923dfee4914b43f0f648a016b01', run_uuid='4e0a4923dfee4914b43f0f648a016b01', start_time=1619730140286, status='FINISHED', user_id='dramos'>]

In [30]:
mlflow.get_run('12edd5db73b84b5aa8f44248470599fa')

<Run: data=<RunData: metrics={'mse': 1386727460.1346002,
 'r2': 0.8012741720529797,
 'rmse': 37238.789724353286}, params={}, tags={'mlflow.log-model.history': '[{"run_id": "12edd5db73b84b5aa8f44248470599fa", '
                             '"artifact_path": "xgboost", "utc_time_created": '
                             '"2021-04-29 21:02:51.019341", "flavors": '
                             '{"python_function": {"loader_module": '
                             '"mlflow.xgboost", "python_version": "3.8.8", '
                             '"data": "model.xgb", "env": "conda.yaml"}, '
                             '"xgboost": {"xgb_version": "1.4.1", "data": '
                             '"model.xgb"}}}]',
 'mlflow.source.name': '/home/dramos/anaconda3/envs/mlflow2/lib/python3.8/site-packages/ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'dramos'}>, info=<RunInfo: artifact_uri='file:///mnt/d/Onderive/Particular/onedrive/Projetos/python/mlflow2/mlflow2/notebooks/mlrun