## 1. Importação de bibliotecas

In [1]:
import pandas as pd
import numpy as np

import warnings
from sklearn.exceptions import ConvergenceWarning

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import (
    StandardScaler,
    MinMaxScaler,
    LabelEncoder,
    OrdinalEncoder,
    OneHotEncoder,
)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

import os

warnings.filterwarnings("ignore", category=ConvergenceWarning)

def salvar_melhores(df, nome_base="ranking_melhores", pasta="."):
    arquivos = [f for f in os.listdir(pasta) if f.startswith(nome_base) and f.endswith(".csv")]

    if not arquivos:
        novo_num = 1
    else:
        nums = [int(f.replace(".csv", "").split("_")[-1]) for f in arquivos]
        novo_num = max(nums) + 1

    nome_arquivo = os.path.join(pasta, f"{nome_base}_{novo_num}.csv")
    df.to_csv(nome_arquivo, index=False)
    print(f"Arquivo salvo em: {nome_arquivo}")


## 2. Carregando o dataset (Iris como exemplo)

In [2]:
df = pd.read_csv("base_limpa_1.csv")

X = df.iloc[:, :-1]

y = df.iloc[:, -1]

if y.dtype == "object":
    y = LabelEncoder().fit_transform(y)

df.head()

Unnamed: 0,GpsProvider,Market/Regular,Origin_Location,Destination_Location,Org_lat_lon,Des_lat_lon,Planned_ETA,Current_Location,DestinationLocation,Curr_lat,Curr_lon,trip_start_date,TRANSPORTATION_DISTANCE_IN_KM,vehicleType,Minimum_kms_to_be_covered_in_a_day,Material Shipped,target
0,CONSENT TRACK,Market,"TVSLSL-PUZHAL-HUB,CHENNAI,TAMIL NADU","ASHOK LEYLAND PLANT 1- HOSUR,HOSUR,KARNATAKA","13.1550,80.1960","12.7400,77.8200",2020-08-21 18:59:01,"Vaniyambadi Rd, Valayambattu, Tamil Nadu 63575...","ASHOK LEYLAND PLANT 1- HOSUR,HOSUR,KARNATAKA",12.6635,78.64987,2020-08-17 14:59:01,320.0,32 FT Single-Axle 7MT - HCV,250.239362,BRACKET / GRAB HANDLE,0
1,VAMOSYS,Regular,"DAIMLER INDIA COMMERCIAL VEHICLES,KANCHIPURAM,...","DAIMLER INDIA COMMERCIAL VEHICLES,KANCHIPURAM,...","12.8390,79.9540","12.8390,79.9540",2020-08-31 20:22:22.827000,"Unnamed Road, Oragadam Industrial Corridor, Va...","DAIMLER INDIA COMMERCIAL VEHICLES,KANCHIPURAM,...",12.836757,79.954428,2020-08-27 16:21:52,103.0,32 FT Multi-Axle 14MT - HCV,250.239362,ZB MODEL PLATE / 3143,1
2,CONSENT TRACK,Regular,"LUCAS TVS LTD-PONDY,PONDY,PONDICHERRY","LUCAS TVS LTD-PONDY,PONDY,PONDICHERRY","11.8710,79.7390","11.8710,79.7390",2020-08-31 21:59:24.987000,"570, National Hwy 48, Shenoy Nagar, Chennai, T...","LUCAS TVS LTD-PONDY,PONDY,PONDICHERRY",13.073956,80.22578,2020-08-27 17:57:04,300.0,1 MT Tata Ace (Open Body),250.239362,LETTERING / FUSO,1
3,VAMOSYS,Regular,"DAIMLER INDIA COMMERCIAL VEHICLES,KANCHIPURAM,...","DAIMLER INDIA COMMERCIAL VEHICLES,KANCHIPURAM,...","12.8390,79.9540","12.8390,79.9540",2020-09-01 04:48:24.503000,"Singaperumal Koil - Sriperumbudur Rd, Oragadam...","DAIMLER INDIA COMMERCIAL VEHICLES,KANCHIPURAM,...",12.836686,79.95056,2020-08-28 00:47:45,61.0,32 FT Multi-Axle 14MT - HCV,250.239362,LU STRUT RA / RADIUS ROD,1
4,VAMOSYS,Regular,"LUCAS TVS LTD-PONDY,PONDY,PONDICHERRY","LUCAS TVS LTD-PONDY,PONDY,PONDICHERRY","11.8720,79.6320","11.8720,79.6320",2020-09-01 05:23:19.243000,"Melmaruvathur, Tamil Nadu 603319, India","LUCAS TVS LTD-PONDY,PONDY,PONDICHERRY",12.429501,79.831556,2020-08-28 01:13:48,240.0,32 FT Multi-Axle 14MT - HCV,250.239362,WISHBONE / V ROD/HDT,1


## 3. Definindo Encoders e Scalers

In [3]:
encoders = {
    "OrdinalEncoder": OrdinalEncoder(),
    "OneHotEncoder": OneHotEncoder(drop="first", sparse_output=False),
    "GetDummies": None,
}

scalers = {
    "StandardScaler": StandardScaler(),
    "MinMaxScaler": MinMaxScaler()
}

## 4. Definindo Modelos e Hiperparâmetros

In [4]:
modelos = {
    "Decision Tree": (
        DecisionTreeClassifier(class_weight="balanced"),
        {
            "criterion": ["gini", "entropy"],
            "splitter": ["best", "random"],
            "max_depth": [None, 5, 10],
            "min_samples_split": [2, 5],
            "min_samples_leaf": [1, 2],
            "max_features": [None, "sqrt"],
        },
    ),
    "Random Forest": (
        RandomForestClassifier(class_weight="balanced"),
        {
            "n_estimators": [100, 200],
            "criterion": ["gini"],
            "max_depth": [None, 10],
            "min_samples_split": [2, 5],
            "min_samples_leaf": [1, 2],
            "max_features": ["sqrt"],
            "bootstrap": [True],
        },
    ),
    "SVM": (
        SVC(),
        {
            "C": [0.1, 1, 10],
            "kernel": ["linear", "rbf"],
            "gamma": ["scale"],
        },
    ),
    "KNN": (
        KNeighborsClassifier(),
        {
            "n_neighbors": [3, 5, 7],
            "weights": ["uniform"],
            "p": [2],
        },
    ),
    "MLP Neural Net": (
        MLPClassifier(max_iter=2000, early_stopping=True),
        {
            "hidden_layer_sizes": [(50,), (100,)],
            "activation": ["relu", "tanh"],
            "solver": ["adam"],
            "alpha": [0.0001, 0.001],
            "learning_rate": ["constant", "adaptive"],
        },
    ),
}


In [None]:
##DECISION TREE

modelos = {
    "Decision Tree": (
        DecisionTreeClassifier(class_weight="balanced"),
        {
            "criterion": ["gini", "entropy"],
            "splitter": ["best", "random"],
            "max_depth": [None, 5, 10],
            "min_samples_split": [2, 5],
            "min_samples_leaf": [1, 2],
            "max_features": [None, "sqrt"],
        },
    ),
}

In [None]:
## RANDOM FOREST

modelos = {
    "Random Forest": (
        RandomForestClassifier(class_weight="balanced"),
        {
            "n_estimators": [100, 200],
            "criterion": ["gini"],
            "max_depth": [None, 10],
            "min_samples_split": [2, 5],
            "min_samples_leaf": [1, 2],
            "max_features": ["sqrt"],
            "bootstrap": [True],
        },
    ),
}

In [None]:
## SVM

modelos = {
    "SVM": (
        SVC(),
        {
            "C": [0.1, 1, 10],
            "kernel": ["linear", "rbf"],
            "gamma": ["scale"],
        },
    ),
}

In [None]:
## KNN

modelos = {
    "KNN": (
        KNeighborsClassifier(),
        {
            "n_neighbors": [3, 5, 7],
            "weights": ["uniform"],
            "p": [2],
        },
    ),
}

In [None]:
## MLP Neural Net

modelos = {
    "MLP Neural Net": (
        MLPClassifier(max_iter=2000, early_stopping=True),
        {
            "hidden_layer_sizes": [(50,), (100,)],
            "activation": ["relu", "tanh"],
            "solver": ["adam"],
            "alpha": [0.0001, 0.001],
            "learning_rate": ["constant", "adaptive"],
        },
    ),
}

## 5. Loop de Testes com GridSearchCV

In [None]:
resultados = []

for enc_name, encoder in encoders.items():
    if enc_name == "GetDummies":
        X_enc = pd.get_dummies(X, drop_first=True)
    else:
        X_enc = encoder.fit_transform(X)
        if isinstance(X_enc, np.ndarray):
            X_enc = pd.DataFrame(X_enc)

    for sc_name, scaler in scalers.items():
        X_train, X_test, y_train, y_test = train_test_split(
            X_enc, y, test_size=0.3, random_state=42
        )

        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        for model_name, (modelo, param_grid) in modelos.items():
            try:
                grid = GridSearchCV(
                    modelo, param_grid, cv=5, scoring="f1_weighted", n_jobs=-1
                )
                grid.fit(X_train, y_train)
                y_pred = grid.predict(X_test)

                resultados.append({
                    "Encoder": enc_name,
                    "Scaler": sc_name,
                    "Modelo": model_name,
                    "Melhores Params": grid.best_params_,
                    "Accuracy": accuracy_score(y_test, y_pred),
                    "Precision": precision_score(y_test, y_pred, average="weighted"),
                    "Recall": recall_score(y_test, y_pred, average="weighted"),
                    "F1-Score": f1_score(y_test, y_pred, average="weighted"),
                })

            except Exception as e:
                resultados.append({
                    "Encoder": enc_name,
                    "Scaler": sc_name,
                    "Modelo": model_name,
                    "Melhores Params": None,
                    "Accuracy": None,
                    "Precision": None,
                    "Recall": None,
                    "F1-Score": None,
                    "Erro": str(e),
                })

KeyboardInterrupt: 

## 6. Ranking Final dos Modelos

In [None]:
resultados_df = pd.DataFrame(resultados)
melhores = resultados_df.sort_values(by="F1-Score", ascending=False)

print("Ranking final:")
display(melhores)

In [None]:
salvar_melhores(melhores)