In [19]:
import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
import numpy as np

import joblib

from sklearn.metrics import mean_squared_error

In [39]:
X_train = pd.read_csv('data/train.csv', dtype={'Class': bool})
X_test = pd.read_csv('data/test.csv', dtype={'Class': bool})

In [40]:
features = [
        'ship_mode', 
        'segment',
        'region', 
        'category', 
        'sub_category', 
        'sales', 
        'quantity', 
        'discount',
        'state'
]

target = 'profit'

In [41]:
for feature in X_train.select_dtypes(include=['object','category']).columns.tolist():
    X_train[feature] = pd.Categorical(X_train[feature], categories=X_train[feature].unique())
    X_test[feature] = pd.Categorical(X_test[feature], categories=X_test[feature].unique())

In [42]:
y_train = X_train[target]
X_train = X_train[X_train.columns.drop(target)]
X_train = X_train[features]

In [43]:
model_et = joblib.load('models/best_model_et.pkl')
model_gbr = joblib.load('models/best_model_gbr.pkl')
model_knr = joblib.load('models/best_model_knr.pkl')
model_lgb = joblib.load('models/best_model_lightgbm.pkl')
model_rf = joblib.load('models/best_model_randomforest.pkl')

In [44]:
for (model, model_name) in [(model_et, 'Extra Trees'), (model_gbr, 'GB Regressor'), (model_knr, 'KN Regressor'), (model_lgb, 'Light GBM Regressor'), (model_rf, 'RF Regressor')]:
    y_pred = model.predict(X_train)

    mse = np.sqrt(mean_squared_error(y_train, y_pred))

    print(f"Erro Quadrático Médio de {model_name}:", mse)

Erro Quadrático Médio de Extra Trees: 50.743084997961994
Erro Quadrático Médio de GB Regressor: 19.82340761372613
Erro Quadrático Médio de KN Regressor: 1.034017196584682
Erro Quadrático Médio de Light GBM Regressor: 28.41843877059997
Erro Quadrático Médio de RF Regressor: 55.572287165679036


In [51]:
X_test['profit'] = model_knr.predict(X_test[features])
X_test[['id', 'profit']].to_csv('data/submission_knn.csv', index=False)

In [52]:
X_test['profit'] = model_gbr.predict(X_test[features])
X_test[['id', 'profit']].to_csv('data/submission_gbr.csv', index=False)

In [53]:
X_test['profit'] = model_lgb.predict(X_test[features])
X_test[['id', 'profit']].to_csv('data/submission_lgbm.csv', index=False)