In [12]:
import sys
sys.path.append("./my-staff")
from database import load_ds
from globals import TEST_SIZE

In [13]:
from sklearn.model_selection import  train_test_split


In [14]:
X, y, features_names, class_names = load_ds("all")
X.shape

(293, 25)

In [16]:
def split(X, y, seed):
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=TEST_SIZE,
                                                                random_state=seed)
    return  X_train, X_test, y_train, y_test

In [9]:
from sklearn.naive_bayes import GaussianNB


In [17]:
result = []
for seed in range(1000):
    X_train, X_test, y_train, y_test = split(X,y , seed)
    model = GaussianNB()

    model.fit(X_train, y_train)

    score = model.score(X_test, y_test)
    result.append([seed, score])


In [18]:
result

[[0, 0.8068181818181818],
 [1, 0.7840909090909091],
 [2, 0.7954545454545454],
 [3, 0.7840909090909091],
 [4, 0.8068181818181818],
 [5, 0.8068181818181818],
 [6, 0.7272727272727273],
 [7, 0.7840909090909091],
 [8, 0.8636363636363636],
 [9, 0.7613636363636364],
 [10, 0.7954545454545454],
 [11, 0.8522727272727273],
 [12, 0.7159090909090909],
 [13, 0.7159090909090909],
 [14, 0.7272727272727273],
 [15, 0.7954545454545454],
 [16, 0.75],
 [17, 0.7386363636363636],
 [18, 0.7272727272727273],
 [19, 0.8068181818181818],
 [20, 0.7159090909090909],
 [21, 0.7840909090909091],
 [22, 0.8295454545454546],
 [23, 0.7840909090909091],
 [24, 0.6818181818181818],
 [25, 0.7727272727272727],
 [26, 0.75],
 [27, 0.8068181818181818],
 [28, 0.7840909090909091],
 [29, 0.8181818181818182],
 [30, 0.75],
 [31, 0.7840909090909091],
 [32, 0.7386363636363636],
 [33, 0.7840909090909091],
 [34, 0.7840909090909091],
 [35, 0.8181818181818182],
 [36, 0.7613636363636364],
 [37, 0.7840909090909091],
 [38, 0.8295454545454546],

In [19]:
import pandas as pd

In [20]:
df = pd.DataFrame(result, columns=["Seed", "Score"])

In [21]:
df.head()

Unnamed: 0,Seed,Score
0,0,0.806818
1,1,0.784091
2,2,0.795455
3,3,0.784091
4,4,0.806818


In [22]:
df.to_csv("prueba.csv", )

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import L2
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import GridSearchCV

# Establecer una semilla aleatoria para reproducibilidad
np.random.seed(42)
tf.random.set_seed(42)

class KerasGridSearchWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, hidden_neurons=[32], lambda_regu=0.01, n_classes=10, learning_rate=0.01, epochs=64, batch_size=128):
        self.hidden_neurons = hidden_neurons
        self.lambda_regu = lambda_regu
        self.n_classes = n_classes
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = self._create_model()

    def _create_model(self):
        model = Sequential()
        for neurons in self.hidden_neurons:
            model.add(Dense(units=neurons, activation="relu", kernel_regularizer=L2(self.lambda_regu)))
        model.add(Dense(units=self.n_classes, activation="linear", kernel_regularizer=L2(self.lambda_regu)))
        model.compile(loss=SparseCategoricalCrossentropy(from_logits=True),
                      optimizer=Adam(learning_rate=self.learning_rate),
                      metrics='accuracy')
        return model

    def fit(self, X, y):
        self.model.fit(X, y, batch_size=self.batch_size,
                        epochs=self.epochs, verbose=0)
        return self

    def score(self, X, y):
        return self.model.evaluate(X, y, verbose=0)[1]

# Definir los hiperparámetros a ajustar
param_grid = {
    'hidden_neurons': [[8], [16], [32]],
    'lambda_regu': [0.01, 0.001],
    'n_classes': [10],
    "epochs" : [64, 128]
}

# Crear el objeto GridSearchCV
model = KerasGridSearchWrapper()
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)

# Datos de entrada y salida
X = np.random.rand(100, 8)  # 100 muestras, 8 características cada una
y = np.random.randint(0, 10, size=(100,))  # 100 etiquetas para un problema de 10 clases

# Realizar la búsqueda de hiperparámetros
grid_result = grid.fit(X, y)

# Imprimir los resultados
print("Mejor: %f usando %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) con: %r" % (mean, stdev, param))


Mejor: 0.139929 usando {'epochs': 64, 'hidden_neurons': [16], 'lambda_regu': 0.01, 'n_classes': 10}
0.119727 (0.041844) con: {'epochs': 64, 'hidden_neurons': [8], 'lambda_regu': 0.01, 'n_classes': 10}
0.129828 (0.050963) con: {'epochs': 64, 'hidden_neurons': [8], 'lambda_regu': 0.001, 'n_classes': 10}
0.139929 (0.037454) con: {'epochs': 64, 'hidden_neurons': [16], 'lambda_regu': 0.01, 'n_classes': 10}
0.099525 (0.050072) con: {'epochs': 64, 'hidden_neurons': [16], 'lambda_regu': 0.001, 'n_classes': 10}
0.109626 (0.056119) con: {'epochs': 64, 'hidden_neurons': [32], 'lambda_regu': 0.01, 'n_classes': 10}
0.119430 (0.063840) con: {'epochs': 64, 'hidden_neurons': [32], 'lambda_regu': 0.001, 'n_classes': 10}
0.119727 (0.041844) con: {'epochs': 128, 'hidden_neurons': [8], 'lambda_regu': 0.01, 'n_classes': 10}
0.119727 (0.041844) con: {'epochs': 128, 'hidden_neurons': [8], 'lambda_regu': 0.001, 'n_classes': 10}
0.119727 (0.041844) con: {'epochs': 128, 'hidden_neurons': [16], 'lambda_regu': 0.

# Hipers

In [1]:
import json

In [2]:
with open("data/hipers.json") as f:
    hipers = json.load(f)

In [3]:
hipers

{'sklearnMLP': {'hidden_layer_sizes': [[50], [100], [50, 50]],
  'activation': ['logistic', 'tanh', 'relu'],
  'solver': ['adam'],
  'alpha': [0.0001, 0.001, 0.01],
  'learning_rate': ['constant'],
  'max_iter': [300, 400]},
 'KerasMLP': {'epochs': [500, 750, 1000],
  'batch_size': [64, 128],
  'learning_rate': [0.001],
  'lambda_regu': [0.01, 0.1],
  'hidden_neurons': [[50], [100], [50, 50]]},
 'RandomForestClassifier': {'n_estimators': [50, 70, 90],
  'max_features': ['auto', 'sqrt', 'log2'],
  'max_depth': [4, 8, 12],
  'min_samples_split': [2, 5],
  'min_samples_leaf': [1, 2, 3],
  'bootstrap': [True, False]},
 'AdaBoostClassifier': {'n_estimators': [50, 70, 90],
  'learning_rate': [0.01, 0.1],
  'algorithm': ['SAMME', 'SAMME.R']},
 'BaggingClassifier': {'n_estimators': [10, 50, 100],
  'max_samples': [0.5, 1.0],
  'max_features': [0.5, 1.0],
  'base_estimator__max_depth': [3, 5, 10]},
 'DecisionTreeClassifier': {'max_depth': [None, 4, 8, 10],
  'min_samples_split': [2, 8, 16, 32],

In [5]:
latex_code = "\\begin{table}[!h]\n"
latex_code += "    \\centering\n"
latex_code += "    \\caption{Hyperparameter Grid}\n"
latex_code += "    \\resizebox{\\textwidth}{!}{"
latex_code += "    \\begin{tabular}{|l|p{4cm}|p{4cm}|}\n"
latex_code += "        \\hline\n"
latex_code += "        \\textbf{Model} & \\textbf{Hyperparameter} & \\textbf{Values} \\\\\n"

previous_model = ""
for model, params in hipers.items():
    for param, values in params.items():
        # Solo añadir el nombre del modelo si es diferente al anterior

        if model != previous_model:
            model_name = model
            latex_code += "        \\hline\n"
        else:
            model_name = ""

        previous_model = model
        param = "\\textit{" + param.replace("-", " ").replace("_", " ") + "}"
        latex_code += f"   {model_name} & {param} & {', '.join(map(str, values))} \\\\  \n"  # Eliminar valor de ejemplo

latex_code += "        \\hline\n"
latex_code += "    \\end{tabular}}\n"
latex_code += "\\end{table}"

print(latex_code)

\begin{table}[!h]
    \centering
    \caption{Hyperparameter Grid}
    \resizebox{\textwidth}{!}{    \begin{tabular}{|l|p{4cm}|p{4cm}|}
        \hline
        \textbf{Model} & \textbf{Hyperparameter} & \textbf{Values} \\
        \hline
   sklearnMLP & \textit{hidden layer sizes} & [50], [100], [50, 50] \\  
    & \textit{activation} & logistic, tanh, relu \\  
    & \textit{solver} & adam \\  
    & \textit{alpha} & 0.0001, 0.001, 0.01 \\  
    & \textit{learning rate} & constant \\  
    & \textit{max iter} & 300, 400 \\  
        \hline
   KerasMLP & \textit{epochs} & 500, 750, 1000 \\  
    & \textit{batch size} & 64, 128 \\  
    & \textit{learning rate} & 0.001 \\  
    & \textit{lambda regu} & 0.01, 0.1 \\  
    & \textit{hidden neurons} & [50], [100], [50, 50] \\  
        \hline
   RandomForestClassifier & \textit{n estimators} & 50, 70, 90 \\  
    & \textit{max features} & auto, sqrt, log2 \\  
    & \textit{max depth} & 4, 8, 12 \\  
    & \textit{min samples split} & 2, 5 \

# Brier Score


In [5]:
import numpy as np

# Example predictions and actual outcomes
predictions = np.array([[0.7, 0.3], [0.2, 0.8], [0.9, 0.1]])  # Probabilities for each class
actuals = np.array([[1, 0], [0, 1], [1, 0]])  # Actual class labels in one-hot encoding

# Calculating the Brier score
brier_scores = np.mean(np.sum((predictions - actuals)**2, axis=1))

In [6]:
brier_scores

0.09333333333333334

In [17]:
model = GaussianNB()

X_train, X_test, y_train, y_test = split(X,y , 8)
model.fit(X_train, y_train)

score = model.score(X_test, y_test)

In [19]:
probas = model.predict_proba(X_test)

In [20]:
probas

array([[9.99992819e-001, 7.18074409e-006],
       [1.40741234e-007, 9.99999859e-001],
       [9.99994137e-001, 5.86268764e-006],
       [4.37411642e-025, 1.00000000e+000],
       [9.99999838e-001, 1.62253494e-007],
       [1.43832941e-006, 9.99998562e-001],
       [1.13944979e-009, 9.99999999e-001],
       [4.37861634e-009, 9.99999996e-001],
       [9.76565910e-005, 9.99902343e-001],
       [1.10254389e-017, 1.00000000e+000],
       [6.88722637e-004, 9.99311277e-001],
       [9.99999920e-001, 8.03194225e-008],
       [1.94331699e-026, 1.00000000e+000],
       [1.98214266e-008, 9.99999980e-001],
       [1.74945314e-083, 1.00000000e+000],
       [6.82381575e-099, 1.00000000e+000],
       [9.99948083e-001, 5.19170646e-005],
       [9.99999960e-001, 4.04228568e-008],
       [9.99999733e-001, 2.66782008e-007],
       [1.93519133e-002, 9.80648087e-001],
       [5.87512879e-003, 9.94124871e-001],
       [9.99994856e-001, 5.14382623e-006],
       [9.98376348e-001, 1.62365248e-003],
       [1.2

In [21]:
probas[:, 1]

array([7.18074409e-06, 9.99999859e-01, 5.86268764e-06, 1.00000000e+00,
       1.62253494e-07, 9.99998562e-01, 9.99999999e-01, 9.99999996e-01,
       9.99902343e-01, 1.00000000e+00, 9.99311277e-01, 8.03194225e-08,
       1.00000000e+00, 9.99999980e-01, 1.00000000e+00, 1.00000000e+00,
       5.19170646e-05, 4.04228568e-08, 2.66782008e-07, 9.80648087e-01,
       9.94124871e-01, 5.14382623e-06, 1.62365248e-03, 1.00000000e+00,
       5.93964005e-06, 9.60732170e-04, 1.00000000e+00, 7.06735316e-07,
       6.87206219e-01, 8.02662290e-05, 1.09128532e-07, 2.31376913e-05,
       3.39866944e-03, 1.00000000e+00, 2.34271106e-09, 2.04645240e-06,
       6.32642253e-04, 4.18096670e-06, 1.91607097e-03, 1.00000000e+00,
       7.28981338e-04, 2.39276924e-07, 5.10533884e-01, 1.00000000e+00,
       1.00000000e+00, 1.00000000e+00, 9.99674729e-01, 2.74197458e-07,
       1.00000000e+00, 1.00000000e+00, 4.00710125e-08, 5.35122864e-06,
       1.94709076e-07, 4.47685901e-02, 1.00000000e+00, 5.06847460e-04,
      

In [22]:
from sklearn.metrics import brier_score_loss


In [24]:
# Calculate the Brier score
brier_score = brier_score_loss(y_test, probas[:, 1])

print("Brier score:", brier_score)

Brier score: 0.13880496219137098
