## Estandarización

In [1]:
from sklearn.preprocessing import StandardScaler

### Data de entrenamiento

In [None]:
# Dataset
train_data = [
    [0, 0],
    [0, 0],
    [2, 3],
    [6, 4]]

# Módulo de estandarización
scaler = StandardScaler() # with_mean=False

# Aplicamos la estandarización
scaler.fit(train_data) # calcula la média y std

In [None]:
print(f'Mean: {scaler.mean_}, Std: {scaler.scale_}')

Mean: [2.   1.75], Std: [2.44948974 1.78535711]


In [None]:
train_data_scaled = scaler.transform(train_data) # (x - mean)/std

In [None]:
#import numpy as np
#print(np.std(train_data_scaled))
print(train_data_scaled)

[[-0.81649658 -0.98019606]
 [-0.81649658 -0.98019606]
 [ 0.          0.70014004]
 [ 1.63299316  1.26025208]]


In [None]:
scaler = StandardScaler()
train_data_scaler = scaler.fit_transform(train_data) # scaler.fit(train_data) + scaler.transform(train_data)
print(train_data_scaler)

[[-0.81649658 -0.98019606]
 [-0.81649658 -0.98019606]
 [ 0.          0.70014004]
 [ 1.63299316  1.26025208]]


### Data de validación

In [None]:
val_data = [[2, 4], [3, 5]]
val_data_scaler = scaler.transform(val_data)
print(val_data_scaler)

[[0.         1.26025208]
 [0.40824829 1.82036411]]


In [None]:
print(f'Mean: {scaler.mean_}, Std: {scaler.scale_}')

Mean: [2.   1.75], Std: [2.44948974 1.78535711]


In [None]:
import numpy as np
np.mean(val_data_scaler,axis=0)

array([0.20412415, 1.54030809])

# Ejercicios




## Preprocesamiento

El siguiente enlace contiene información de diferentes módulos de preprocesamiento que puede aplicar a un conjunto de datos: [Preprocessing sklearn](https://scikit-learn.org/stable/api/sklearn.preprocessing.html). En base a ello, resolver lo siguiente:

### 1. Transformación de datos numéricos

- Investigar y ejecutar ejemplos de [Min-Max Scaler](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html#sklearn.preprocessing.MinMaxScaler)

In [None]:
from sklearn.preprocessing import MinMaxScaler
data = [[-15, 1], [-0.5, 6], [0, 15], [10, 1]]
scaler = MinMaxScaler(feature_range=(3,6))
print(scaler.fit(data))
#print(scaler.data_max_)
#print(scaler.scale_)
print(scaler.transform(data))
print(scaler.transform([[2, 6]]))

MinMaxScaler(feature_range=(3, 6))
[[3.         3.        ]
 [4.74       4.07142857]
 [4.8        6.        ]
 [6.         3.        ]]
[[5.04       4.07142857]]


In [None]:
scaler.fit(data)

In [None]:
print(f'Mean: {scaler.data_min_}, Std: {scaler.scale_}')

Mean: [-15.   1.], Std: [0.12       0.21428571]


### 2. Transformación de datos categóricos

- Investigar y ejecutar ejemplos de [Ordinal Encoder](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OrdinalEncoder.html#sklearn.preprocessing.OrdinalEncoder)

In [None]:
from sklearn.preprocessing import OrdinalEncoder
enc = OrdinalEncoder()
X = [['Male', 1],
     ['Female', 3],
     ['Female', 2]]

enc.fit(X)
print(enc.categories_)
enc.transform([['Female', 3], ['Male', 1]])

[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]


array([[0., 2.],
       [1., 0.]])

- Investigar y ejecutar ejemplos de [One-hot Encoder](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html#sklearn.preprocessing.OneHotEncoderr)

In [6]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
#X = [['Male', 1], ['Female', 3], ['Female', 2]]
X = [['Male'], ['Female'], ['Female'], ['Otro']]
enc.fit(X)
print(enc.categories_)
#enc.transform([['Female', 1], ['Male', 4]]).toarray()
enc.transform([['Female'], ['Male'], ['Otro'], ['Female'], ['Unknown']]).toarray()
#enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])
#enc.get_feature_names_out(['gender', 'group'])

[array(['Female', 'Male', 'Otro'], dtype=object)]


array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 0.]])

### 3. Transformación de etiquetas


- Investigar y ejecutar ejemplos de [Label Encoder](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html#sklearn.preprocessing.LabelEncoder)

In [None]:
labels = ['red', 'brown', 'green', 'blue', 'red', 'green']
le = LabelEncoder()
le.fit(labels)

In [None]:
transformed_labels = le.transform(labels)
print(transformed_labels)


[3 1 2 0 3 2]


In [None]:
print("Categorías detectadas:", le.classes_)
original_labels = le.inverse_transform(transformed_labels)
print("Etiquetas originales:", original_labels)


Categorías detectadas: ['blue' 'brown' 'green' 'red']
Etiquetas originales: ['red' 'brown' 'green' 'blue' 'red' 'green']


In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit([1, 6, 2, 2, 6])
le.classes_
le.transform([1, 6, 2, 1])
#le.inverse_transform([0, 0, 1, 2])

array([0, 2, 1, 0])

In [None]:
le = LabelEncoder()
le.fit(["paris", "bruselas", "tokyo", "amsterdam"])
list(le.classes_)
le.transform(["tokyo", "bruselas", "amsterdam"])
#list(le.inverse_transform([0, 2, 1]))

array([3, 1, 0])

## (Opcional) KNN

Investigar y probar [KNeighborsClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html)

In [None]:
X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y)
print(neigh.predict([[3]]))
#print(neigh.predict_proba([[1.8]]))

[1]


In [None]:
# Importar las bibliotecas necesarias
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Cargar el conjunto de datos Iris
iris = load_iris()
X = iris.data
y = iris.target

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Crear y entrenar el clasificador KNN
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Hacer predicciones en el conjunto de prueba
y_pred = knn.predict(X_test)

# Evaluar la precisión del modelo
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisión del modelo: {accuracy}")

# Hacer una predicción con una nueva muestra
new_sample = [[5.1, 3.5, 1.4, 0.2]]
prediction = knn.predict(new_sample)
print(f"Predicción para la nueva muestra: {iris.target_names[prediction]}")


Precisión del modelo: 1.0
Predicción para la nueva muestra: ['setosa']
