In [None]:
import os
from skimage.io import imread
from skimage.transform import resize
from skimage import exposure, restoration, filters
import numpy as np
from sklearn.preprocessing import LabelEncoder

DATASET_PATH = os.path.join('..', 'Input')

X, y = [], []

for label in os.listdir(DATASET_PATH):

    label_path = os.path.join(DATASET_PATH, label)

    if not os.path.isdir(label_path):
        continue

    for file in os.listdir(label_path):

        try:
            image_path = os.path.join(label_path, file)
            img = imread(image_path, as_gray=True)

            img = exposure.equalize_adapthist(img, clip_limit=0.03, kernel_size=32)
            img = restoration.denoise_bilateral(img, sigma_color=0.04, sigma_spatial=1.4, channel_axis=None)

            edges = filters.sobel(img)
            img_enhanced = img + 0.24 * edges
            img_enhanced = np.clip(img_enhanced, 0, 1)

            brain_pixels = img_enhanced[img_enhanced > 0]
            if len(brain_pixels) > 0:
                p1, p98 = np.percentile(brain_pixels, (1, 98))
                img_enhanced = exposure.rescale_intensity(img_enhanced, in_range=(p1, p98)) # type: ignore

            img_resized = resize(img_enhanced, (128, 128), anti_aliasing=True, preserve_range=True)

            X.append(img_resized.flatten())
            y.append(label)
            
        except Exception as e:
            print(f"Error en {image_path}, omitiendo... Error: {e}")  # type: ignore


X = np.array(X)

encoder = LabelEncoder()
y = encoder.fit_transform(y)

In [None]:
import matplotlib.pyplot as plt

unique_classes = np.unique(y)

plt.figure(figsize=(12, 6))

for idx, class_id in enumerate(unique_classes):

    img_index = np.where(y == class_id)[0][0]
    
    plt.subplot(1, len(unique_classes), idx + 1)
    plt.imshow(X[img_index].reshape(128, 128), cmap='gray')
    plt.title(encoder.inverse_transform([class_id])[0])
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import os
from skimage.io import imread
from skimage.transform import resize
from skimage import exposure, restoration, filters
import numpy as np
from sklearn.preprocessing import LabelEncoder
from skimage.transform import rotate

DATASET_PATH = os.path.join('..', 'Input')
ROTATION_ANGLES = [-8, -4, 4, 8]
BRIGHTNESS_FACTORS = [0.85, 1.15]

X, y = [], []

for label in os.listdir(DATASET_PATH):

    label_path = os.path.join(DATASET_PATH, label)

    if not os.path.isdir(label_path):
        continue

    for file in os.listdir(label_path):

        try:
            image_path = os.path.join(label_path, file)
            img = imread(image_path, as_gray=True)
            img_resized = resize(img, (128, 128), anti_aliasing=True, preserve_range=True)

            X.append(img_resized.flatten())
            y.append(label)
            
            augmented_images = []
            
            for angle in ROTATION_ANGLES:
                img_rotated = rotate(img_resized, angle, resize=False, preserve_range=True)
                augmented_images.append(img_rotated)           

            img_flipped = np.fliplr(img_resized)
            augmented_images.append(img_flipped)
            
            for factor in BRIGHTNESS_FACTORS:
                img_bright = np.clip(img_resized * factor, 0, 1)
                augmented_images.append(img_bright)
            
            for aug_img in augmented_images:
                X.append(aug_img.flatten())
                y.append(label)
            
        except Exception as e:
            print(f"Error en {image_path}, omitiendo... Error: {e}")


X = np.array(X)

encoder = LabelEncoder()
y = encoder.fit_transform(y)

In [None]:
#6.056 imagenes cargadas, representadas en 16.384 features (pixeles) por imagen.

X.shape

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    shuffle = True,
    random_state=666,
    stratify=y
)

In [None]:
import umap
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

pipe = Pipeline(
    [
        ('reducer', umap.UMAP(random_state=666)),
        ('scaler', StandardScaler()),
        ('classifier', MLPClassifier(solver='adam',
                                     early_stopping=True,
                                     learning_rate='adaptive',
                                     random_state=666,
                                     max_iter=700,
                                     validation_fraction=0.2,
                                     shuffle=True))
    ]
)

pipe.fit(X_train, y_train)

In [None]:
import matplotlib.pyplot as plt

X_umap = pipe.named_steps['reducer'].transform(X_train)

classes = encoder.classes_ 

plt.figure(figsize=(8, 6))

for class_id in np.unique(y_train):

    idx = y_train == class_id
    plt.scatter(X_umap[idx, 0], X_umap[idx, 1], s=10, label=classes[class_id])

plt.title("Representación UMAP de las imágenes")
plt.xlabel("Componente 1")
plt.ylabel("Componente 2")
plt.legend(title='Clase original')
plt.show()

In [None]:
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

def imscatter(x, y, images, ax=None, zoom=0.3):
    ax = ax or plt.gca()
    artists = []
    for x0, y0, img in zip(x, y, images):
        img = img.reshape(128, 128)
        imagebox = OffsetImage(img, zoom=zoom, cmap='gray')
        ab = AnnotationBbox(imagebox, (x0, y0), frameon=False)
        artists.append(ax.add_artist(ab))
    ax.update_datalim(np.column_stack([x, y]))
    ax.autoscale()

# Selecciona solo las primeras 50 para no saturar la gráfica
n = 50
X_umap_small = X_umap[:n]
images_small = X_train[:n]

fig, ax = plt.subplots(figsize=(10, 8))
imscatter(X_umap_small[:, 0], X_umap_small[:, 1], images_small, ax=ax, zoom=0.5)
plt.show()

In [None]:
from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score, f1_score

y_pred_train = pipe.predict(X_train)
y_pred_test = pipe.predict(X_test)

print('balanced accuracy train:', balanced_accuracy_score(y_train, y_pred_train))
print('balanced accuracy test:', balanced_accuracy_score(y_test, y_pred_test))
print('precision score train:', precision_score(y_train, y_pred_train, average='micro'))
print('precision score test:', precision_score(y_test, y_pred_test, average='micro'))
print('recall score train:', recall_score(y_train, y_pred_train, average='micro'))
print('recall score test:', recall_score(y_test, y_pred_test, average='micro'))
print('f1 score train:', f1_score(y_train, y_pred_train, average='micro'))
print('f1 score test:', f1_score(y_test, y_pred_test, average='micro'))

In [None]:
from sklearn.metrics import confusion_matrix
import pandas as pd

def matriz(yt, yp):
    labels = np.unique(yt)
    matrix = confusion_matrix(y_true=yt, y_pred=yp, labels=labels)
    
    index = [f"{label} (Clase Real)" for label in labels]
    columns = [f"{label} (Predicción)" for label in labels]
    
    return pd.DataFrame(matrix, index=index, columns=columns)


matrix_train = matriz(y_train, pipe.predict(X_train))
matrix_test = matriz(y_test, pipe.predict(X_test))

display(matrix_train, matrix_test)

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'reducer__n_neighbors': [8],
    'reducer__n_components': [25, 30],
    'reducer__min_dist': [0.04],
    'reducer__metric': ['manhattan'],
    'reducer__spread': [1.2],
    'reducer__local_connectivity': [0.7],
    'classifier__hidden_layer_sizes': [(75, 25), (90, 30)],
    'classifier__alpha': [0.03],
    'classifier__batch_size': [32, 64],
    'classifier__tol': [0.00000001],
    'classifier__n_iter_no_change': [20]
}

model = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    cv=2,
    scoring='balanced_accuracy',
    n_jobs=-1,
    refit=True,
    verbose=1,
)

model.fit(X_train, y_train)

In [None]:
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print('mejores parámetros encontrados:', model.best_params_)
print()
print('balanced accuracy train:', balanced_accuracy_score(y_train, y_pred_train))
print('balanced accuracy test:', balanced_accuracy_score(y_test, y_pred_test))
print('precision score train:', precision_score(y_train, y_pred_train, average='micro'))
print('precision score test:', precision_score(y_test, y_pred_test, average='micro'))
print('recall score train:', recall_score(y_train, y_pred_train, average='micro'))
print('recall score test:', recall_score(y_test, y_pred_test, average='micro'))
print('f1 score train:', f1_score(y_train, y_pred_train, average='micro'))
print('f1 score test:', f1_score(y_test, y_pred_test, average='micro'))
print()
print('score train:', model.score(X_train, y_train))
print('score test:', model.score(X_test, y_test))

In [None]:
matrix_train = matriz(y_train, model.predict(X_train))
matrix_test = matriz(y_test, model.predict(X_test))

display(matrix_train, matrix_test)

In [None]:
results = pd.DataFrame(model.cv_results_).sort_values('rank_test_score').reset_index(drop=True)

results = results[
    [
        'param_reducer__n_components',
        'param_classifier__hidden_layer_sizes',
        'param_classifier__batch_size',
        'mean_test_score',
        'std_test_score',
        'rank_test_score'
    ]
].sort_values('mean_test_score', ascending=False).reset_index(drop=True)

results

In [None]:
# import pickle

# os.makedirs('../Output/models', exist_ok=True)

# with open('../Output/models/model_txt.pkl', 'wb') as file:
#     pickle.dump(model, file)