In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.tree import export_graphviz
import pickle


In [3]:
df = pd.read_csv("/Users/lucas/Documents/Lucas /projetos/cars/data/abt.csv")

In [4]:
X = df.drop(['valuation', 'selling_price', 'day', 'state', 'month', 'color','interior', 'week_day','transmission','year'], axis=1)
y = df['valuation']

In [6]:
import pickle

def evaluate_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model_rf = RandomForestRegressor(n_estimators=100, random_state=42)

    model_rf.fit(X_train, y_train)

    y_pred = model_rf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print("Mean Squared Error (MSE):", mse)
    print("Root Mean Squared Error (RMSE):", rmse)
    print("Mean Absolute Error (MAE):", mae)
    print("R-squared (R²):", r2)

    model_file = 'modelo_rf.pkl'
    try:
        with open(model_file, 'wb') as f:
            pickle.dump(model_rf, f)
        print(f"O modelo foi salvo com sucesso em {model_file}.")
    except Exception as e:
        print(f"Erro ao salvar o modelo: {e}")

    tree = model_rf.estimators_[0]
    export_graphviz(tree, out_file='tree.dot', feature_names=X.columns, rounded=True, filled=True)

    importances = model_rf.feature_importances_

    feature_importance_df = pd.DataFrame({
        'Feature': X_train.columns,
        'Importance': importances
    })

    feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

    print(feature_importance_df)

    return mse, rmse, mae, r2

mse, rmse, mae, r2 = evaluate_model(X, y)


Mean Squared Error (MSE): 4.3624106287045227e-05
Root Mean Squared Error (RMSE): 0.006604854751396523
Mean Absolute Error (MAE): 0.003314782861927842
R-squared (R²): 0.9840229801850969
O modelo foi salvo com sucesso em modelo_rf.pkl.
          Feature  Importance
7        odometer    0.340398
5  chassis_number    0.194413
4            body    0.139125
0        car_year    0.120288
2           model    0.091146
1    manufacturer    0.054333
3         version    0.054137
8          seller    0.003659
6       condition    0.002500
