In [110]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegressionCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV

In [111]:
model_params = {
    "logistic_regression": {
        "clf": LogisticRegressionCV(multi_class="multinomial", random_state=42),
        "params": {
            "max_iter": [100, 500],
            "solver": ["sag", "saga"],
        },
    },
    "svm": {
        "clf": SVC(random_state=42),
        "params": {
            "kernel": ["rbf", "poly"],
            "C": [10],
            "kernel": ["rbf"],
        },
    },
    "knn": {
        "clf": KNeighborsClassifier(),
        "params": {
            "n_neighbors": [5, 10],
        },
    },
    "decision_tree": {
        "clf": DecisionTreeClassifier(criterion="entropy", random_state=42),
        "params": {
            "max_depth": [10],
        },
    },
}

In [112]:
def train_test_validation_split(
    x: pd.DataFrame,
    y: pd.Series,
    train_size: float = 0.7,
    test_size: float = 0.15,
    validation_size: float = 0.15,
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.Series, pd.Series, pd.Series]:
    """Split data into fixed train, test and validation subsets."""
    x_train, x_test, y_train, y_test = train_test_split(
        x,
        y,
        train_size=train_size,
        test_size=test_size + validation_size,
        random_state=42,
    )
    x_test, x_validation, y_test, y_validation = train_test_split(
        x_test,
        y_test,
        train_size=test_size / (test_size + validation_size),
        test_size=validation_size / (test_size + validation_size),
        random_state=42,
    )
    return x_train, x_test, x_validation, y_train, y_test, y_validation

In [113]:
df: pd.DataFrame = pd.read_csv("Vectores_Caracteristicos_Mariposas.csv")
x: pd.DataFrame = df.drop("Etiqueta", axis=1)
y: pd.Series = df["Etiqueta"].astype(int)

(
    x_train,
    x_test,
    x_validation,
    y_train,
    y_test,
    y_validation,
) = train_test_validation_split(x, y)

In [114]:
scores = []

for name, model in model_params.items():
    gs = GridSearchCV(model["clf"], model["params"], verbose=True)
    gs.fit(x_train, y_train)
    scores.append(
        {
            "name": name,
            "best_score": gs.best_score_,
            "best params": gs.best_params_,
        }
    )
scores

Fitting 5 folds for each of 2 candidates, totalling 10 fits


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Fitting 5 folds for each of 6 candidates, totalling 30 fits


Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py", line 705, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
                             ^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/neighbors/_classification.py", line 246, in predict
    if self._fit_method == "brute" and ArgKminClassMode.is_usable_for(
                                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/metrics/_pairwise_distances_reduction/_dispatcher

[{'name': 'logistic_regression',
  'best_score': 0.3092101385204834,
  'best params': {'max_iter': 500}},
 {'name': 'svm',
  'best_score': 0.3693486590038314,
  'best params': {'C': 10, 'kernel': 'rbf'}},
 {'name': 'knn', 'best_score': nan, 'best params': {'n_neighbors': 5}},
 {'name': 'decision_tree',
  'best_score': 0.2732242852932508,
  'best params': {'max_depth': 10}}]