# Código para entender las funcionalidad de *IngeoML*

In [10]:
# Bloque de código para instalar las bibliotecas necesarias para correr IngeoML
#!pip install --upgrade "jax[cpu]"
#!pip install optax
#!pip install IngeoML

In [11]:
# Verificar que versión de python se está usando.
import sys
print(sys.version)

3.11.7 (main, Dec 15 2023, 12:09:04) [Clang 14.0.6 ]


In [12]:
# Cuántos núcleos disponibles hay en el CPU
import os
n_cores = os.cpu_count()
print(f"Número de núcleos de CPU disponibles: {n_cores}")


Número de núcleos de CPU disponibles: 4


In [13]:
from IngeoML import StatisticSamples
from sklearn.metrics import accuracy_score,recall_score
import numpy as np

In [14]:
# se obtiene la distribución muestral de la la media y la desvición estándar
# de 10 muestras bootstrap de una población de cinco datos
statistic = StatisticSamples(num_samples=10, statistic=np.mean)
empirical_distribution = np.r_[[3, 4, 5, 2, 4]]
print(statistic(empirical_distribution))
print("Muestras de statistic (mean) \n",statistic._samples)
statistic2 = StatisticSamples(num_samples=10, statistic=np.std)
print(statistic2(empirical_distribution))
print("Muestras de statistic2 (std) \n",statistic2._samples)
# en este caso se usand dos objetos-funciones "statistic" y "statistic2"

[3.2 3.8 4.2 3.8 3.6 3.8 2.2 4.  4.  3.6]
Muestras de statistic (mean) 
 [[0 4 3 3 2]
 [4 1 3 4 2]
 [2 1 2 0 1]
 [3 2 4 1 1]
 [1 0 3 2 4]
 [1 2 3 0 2]
 [3 3 0 3 3]
 [2 2 1 4 3]
 [1 0 2 1 4]
 [0 4 1 3 2]]
[0.         0.9797959  0.74833148 0.4        1.0198039  0.48989795
 0.8        1.16619038 0.9797959  1.16619038]
Muestras de statistic2 (std) 
 [[1 1 4 4 4]
 [4 2 1 3 4]
 [4 4 3 0 0]
 [4 2 1 1 4]
 [0 2 1 3 0]
 [4 0 0 1 4]
 [4 1 4 0 3]
 [0 2 3 3 1]
 [3 3 4 4 4]
 [2 4 3 3 0]]


Como se puede ver, las muestras son diferentes en cada llamado, eso dificulta poder hacer bootstrap para diferentes estadísticos y/o métricas

In [15]:

# se repite el experimento
statistic = StatisticSamples(num_samples=10, statistic=np.mean)
empirical_distribution = np.r_[[3, 4, 5, 2, 4]]
print(statistic(empirical_distribution))
print("Muestras de statistic (mean) \n",statistic._samples)
statistic2 = StatisticSamples(num_samples=10, statistic=np.mean)
print(statistic2(empirical_distribution))
print("Muestras de statistic2 (mean2) \n",statistic2._samples)
# en este caso se utilizó la misma distribución "empirical_distribution"

[4.4 4.  3.6 3.8 3.8 3.2 4.4 3.6 3.8 3.2]
Muestras de statistic (mean) 
 [[2 0 2 4 2]
 [4 1 0 1 2]
 [0 1 0 4 4]
 [1 2 3 0 2]
 [4 0 3 2 2]
 [3 2 3 0 4]
 [4 2 2 0 2]
 [4 0 0 0 2]
 [4 2 3 0 2]
 [4 3 1 4 3]]
[3.8 3.6 3.8 2.4 4.  4.  4.  3.6 3.6 3.4]
Muestras de statistic2 (mean2) 
 [[0 1 0 2 4]
 [4 4 4 3 1]
 [4 4 1 3 2]
 [3 3 3 3 4]
 [2 0 4 2 0]
 [2 1 1 2 3]
 [2 0 4 4 1]
 [0 4 3 1 2]
 [4 0 4 0 1]
 [1 1 3 0 1]]


In [16]:

# se repite el experimento
statistic = StatisticSamples(num_samples=10, statistic=np.mean)
empirical_distribution = np.r_[[3, 4, 5, 2, 4]]
print(statistic(empirical_distribution))
print("Muestras de statistic (mean) \n",statistic._samples)
statistic = StatisticSamples(num_samples=10, statistic=np.mean)
print(statistic(empirical_distribution))
print("Muestras de statistic2 (mean2) \n",statistic._samples)
# en este caso se utilizó la misma distribución "empirical_distribution" 
# y el mismo nombre de funcion-objeto "statistic" y sin embargo las 
# muestras son distintas

[2.8 3.4 4.4 2.  4.  3.  3.6 2.4 4.6 3.6]
Muestras de statistic (mean) 
 [[3 0 3 0 4]
 [4 0 1 0 0]
 [2 0 2 2 4]
 [3 3 3 3 3]
 [2 2 1 3 1]
 [3 4 4 0 3]
 [0 1 3 2 1]
 [3 3 0 3 0]
 [2 2 4 2 1]
 [1 3 4 1 4]]
[3.6 4.  2.6 4.  3.8 3.4 4.  3.4 3.4 3.4]
Muestras de statistic2 (mean2) 
 [[2 3 1 2 3]
 [4 2 0 4 1]
 [0 3 0 3 0]
 [4 1 1 4 1]
 [4 0 4 0 2]
 [0 1 1 4 3]
 [4 1 2 2 3]
 [1 0 0 3 2]
 [2 0 4 0 3]
 [3 2 4 3 4]]


In [17]:
# se repite el ejemplo con métricas de desempeño y nuevamente 
# las muestras son distintas
labels = np.r_[[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]]
pred   = np.r_[[0, 0, 1, 0, 0, 1, 1, 1, 0, 1]]
print(accuracy_score(labels, pred))
acc = StatisticSamples(num_samples=10, statistic=accuracy_score)
acc(labels, pred)
print("Muestras de acc \n",acc._samples)
print(recall_score(labels, pred,zero_division =0.0))
rec = StatisticSamples(num_samples=10, statistic=recall_score)
rec(labels, pred)
print("Muestras de recall \n",rec._samples)

0.8
Muestras de acc 
 [[7 5 5 0 9 2 8 6 6 8]
 [0 0 8 0 4 8 1 4 7 9]
 [6 7 9 8 5 3 9 7 8 2]
 [9 0 8 8 9 5 2 7 9 7]
 [0 2 5 7 4 3 8 9 7 7]
 [9 9 8 4 6 5 5 4 9 2]
 [9 2 5 1 5 7 7 6 4 5]
 [3 2 1 3 4 2 5 7 4 6]
 [1 9 4 4 5 8 0 1 3 0]
 [8 3 6 0 3 6 5 0 2 7]]
0.8
Muestras de recall 
 [[8 7 0 6 1 8 0 6 5 7]
 [0 1 9 0 8 7 2 7 8 0]
 [1 3 0 1 8 8 9 0 1 7]
 [5 7 4 9 4 8 2 7 1 0]
 [5 2 1 0 1 7 7 7 9 5]
 [2 5 2 8 1 9 2 6 0 6]
 [6 8 4 6 9 0 8 1 1 3]
 [7 3 3 0 7 6 4 9 6 1]
 [7 8 9 4 1 5 6 4 0 5]
 [8 0 1 6 7 4 0 4 3 2]]
