In [10]:
import pandas as pd
import os
os.chdir('/opt/projects/mlflow')

In [11]:
df = pd.read_csv('./data/processed/casas.csv')

In [12]:
df.head()

Unnamed: 0,tamanho,ano,garagem,preco
0,159.0,2003,2,208500
1,117.0,1976,2,181500
2,166.0,2001,2,223500
3,160.0,1915,3,140000
4,204.0,2000,3,250000


In [18]:
X = df.drop(columns=['preco'])
y = df['preco'].copy()

In [19]:
X.head()

Unnamed: 0,tamanho,ano,garagem
0,159.0,2003,2
1,117.0,1976,2
2,166.0,2001,2
3,160.0,1915,3
4,204.0,2000,3


In [20]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, random_state= 42)

# Linear Regression

In [34]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

lr = LinearRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

mse = mean_squared_error(y_test, lr_pred)
rmse = mse ** 0.5
r2 = r2_score(y_test, lr_pred)

print('MSE: ', mse)
print('RMSE: ', rmse)
print('R2: ', r2)

MSE:  2078666917.9289908
RMSE:  45592.39978251848
R2:  0.7021153642898048


In [36]:
from xgboost import XGBRegressor

xgb = XGBRegressor()
xgb.fit(X_train, y_train)
xgb_pred = xgb.predict(X_test)

mse = mean_squared_error(y_test, xgb_pred)
rmse = mse ** 0.5
r2 = r2_score(y_test, xgb_pred)

print('MSE: ', mse)
print('RMSE: ', rmse)
print('R2: ', r2)

MSE:  1572136399.5585918
RMSE:  39650.17527777894
R2:  0.7747039794921875


# mlflow

In [37]:
import mlflow

In [42]:
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor, XGBRFRegressor
from sklearn.metrics import mean_squared_error, r2_score

mlflow.set_experiment('house_price-eda')

xgb_params = {
    'learning_rate': 0.2,
    'n_estimators': 50,
    'random_state': 42
}

with mlflow.start_run():
    mlflow.xgboost.autolog()

    model = XGBRFRegressor(**xgb_params)


    model.fit(X_train, y_train)
    model_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, model_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_test, model_pred)

    mlflow.log_metrics({'MSE': mse, 'RMSE': rmse, 'R2': r2})

2024/09/25 23:37:41 INFO mlflow.tracking._tracking_service.client: 🏃 View run stately-ape-119 at: http://mlflow_server:5000/#/experiments/1/runs/23d050c9ec1747ea83b554886b32b09c.
2024/09/25 23:37:41 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow_server:5000/#/experiments/1.
