In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime

In [16]:
# Carregar o dataset
data = pd.read_csv('..//data//Walmart.csv')

# Pré-processamento
# Verificar valores nulos
print("Valores nulos:\n", data.isnull().sum())

Valores nulos:
 invoice_id         0
Branch             0
City               0
category           0
unit_price        31
quantity          31
date               0
time               0
payment_method     0
rating             0
profit_margin      0
dtype: int64


In [17]:
# Extrair características de data e hora
data['date'] = pd.to_datetime(data['date'], format='%d/%m/%y')
data['month'] = data['date'].dt.month
data['day_of_week'] = data['date'].dt.dayofweek
data['time'] = pd.to_datetime(data['time'], format='%H:%M:%S')
data['hour'] = data['time'].dt.hour


In [18]:
# Codificar variáveis categóricas
le = LabelEncoder()
data['Branch'] = le.fit_transform(data['Branch'])
data['City'] = le.fit_transform(data['City'])
data['category'] = le.fit_transform(data['category'])
data['payment_method'] = le.fit_transform(data['payment_method'])

In [19]:
# Selecionar features e alvo
features = ['Branch', 'City', 'category', 'unit_price', 'quantity', 'month', 'day_of_week', 'hour', 'rating', 'profit_margin']
X = data[features]
y = data['payment_method']

In [20]:
# Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)

In [21]:
# Modelo 1: Árvore de Decisão
dt = DecisionTreeClassifier(random_state=27)

# Grid Search para otimizar hiperparâmetros
param_grid_dt = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_dt = GridSearchCV(dt, param_grid_dt, cv=5, scoring='accuracy')
grid_dt.fit(X_train, y_train)

# Melhor modelo de árvore
best_dt = grid_dt.best_estimator_
y_pred_dt = best_dt.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print("Melhor Acurácia Árvore de Decisão:", accuracy_dt)
print("Melhores Hiperparâmetros:", grid_dt.best_params_)
print("Relatório de Classificação Árvore de Decisão:\n", classification_report(y_test, y_pred_dt))

ValueError: 
All the 180 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
36 fits failed with the following error:
Traceback (most recent call last):
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\base.py", line 1363, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\tree\_classes.py", line 1024, in fit
    super()._fit(
    ~~~~~~~~~~~~^
        X,
        ^^
    ...<2 lines>...
        check_input=check_input,
        ^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\tree\_classes.py", line 252, in _fit
    X, y = validate_data(
           ~~~~~~~~~~~~~^
        self, X, y, validate_separately=(check_X_params, check_y_params)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\utils\validation.py", line 2966, in validate_data
    X = check_array(X, input_name="X", **check_X_params)
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\utils\validation.py", line 1053, in check_array
    array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\utils\_array_api.py", line 757, in _asarray_with_order
    array = numpy.asarray(array, order=order, dtype=dtype)
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\pandas\core\generic.py", line 2168, in __array__
    arr = np.asarray(values, dtype=dtype)
ValueError: could not convert string to float: '$67'

--------------------------------------------------------------------------------
144 fits failed with the following error:
Traceback (most recent call last):
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\base.py", line 1363, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\tree\_classes.py", line 1024, in fit
    super()._fit(
    ~~~~~~~~~~~~^
        X,
        ^^
    ...<2 lines>...
        check_input=check_input,
        ^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\tree\_classes.py", line 252, in _fit
    X, y = validate_data(
           ~~~~~~~~~~~~~^
        self, X, y, validate_separately=(check_X_params, check_y_params)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\utils\validation.py", line 2966, in validate_data
    X = check_array(X, input_name="X", **check_X_params)
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\utils\validation.py", line 1053, in check_array
    array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\sklearn\utils\_array_api.py", line 757, in _asarray_with_order
    array = numpy.asarray(array, order=order, dtype=dtype)
  File "d:\GitHub Desktop\IAClass_ML_Algorithms\dsvenv\Lib\site-packages\pandas\core\generic.py", line 2168, in __array__
    arr = np.asarray(values, dtype=dtype)
ValueError: could not convert string to float: '$48'
