# AstroLens – 01_EDA_preprocessing_and_CNN
Clasificación multiclase con **Galaxy10 DECaLS**.

In [ ]:
# En Colab, ejecuta esta celda una vez:
# !pip -q install astroNN tensorflow numpy pandas matplotlib scikit-learn

In [ ]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow import keras
from tensorflow.keras import layers
from astroNN.datasets import galaxy10
from astroNN.datasets.galaxy10 import GALAXY10_LABELS
np.random.seed(42)

In [ ]:
images, labels = galaxy10.load_data()  # (N,256,256,3), (N,)
class_names = GALAXY10_LABELS
images = images.astype('float32')/255.0
print(images.shape, labels.shape); print(class_names)

In [ ]:
unique, counts = np.unique(labels, return_counts=True)
plt.figure(); plt.bar([class_names[k] for k in unique], counts); plt.xticks(rotation=45, ha='right'); plt.title('Distribución de clases'); plt.tight_layout(); plt.show()
idx = np.random.choice(len(images), 9, replace=False)
plt.figure(figsize=(6,6))
for i, ix in enumerate(idx):
    plt.subplot(3,3,i+1); plt.imshow(images[ix]); plt.axis('off'); plt.title(class_names[labels[ix]], fontsize=8)
plt.tight_layout(); plt.show()

In [ ]:
X_train, X_temp, y_train, y_temp = train_test_split(images, labels, test_size=0.3, random_state=42, stratify=labels)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)
num_classes = len(class_names)
y_train_cat = keras.utils.to_categorical(y_train, num_classes)
y_val_cat = keras.utils.to_categorical(y_val, num_classes)
y_test_cat = keras.utils.to_categorical(y_test, num_classes)
input_shape = X_train.shape[1:]

In [ ]:
model = keras.Sequential([
  layers.Conv2D(32,3,activation='relu',padding='same',input_shape=input_shape),
  layers.MaxPooling2D(),
  layers.Conv2D(64,3,activation='relu',padding='same'),
  layers.MaxPooling2D(),
  layers.Conv2D(128,3,activation='relu',padding='same'),
  layers.MaxPooling2D(),
  layers.GlobalAveragePooling2D(),
  layers.Dense(128,activation='relu'),
  layers.Dropout(0.3),
  layers.Dense(num_classes,activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [ ]:
callbacks=[keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=3,restore_best_weights=True)]
history=model.fit(X_train,y_train_cat,validation_data=(X_val,y_val_cat),epochs=10,batch_size=64,callbacks=callbacks,verbose=1)

In [ ]:
test_loss,test_acc=model.evaluate(X_test,y_test_cat,verbose=0); print('Test accuracy:',round(float(test_acc),4))
y_pred=model.predict(X_test,verbose=0).argmax(axis=1)
print(classification_report(y_test,y_pred,target_names=class_names))
print(confusion_matrix(y_test,y_pred))

In [ ]:
import os, json
idx=np.random.choice(len(X_test),9,replace=False)
plt.figure(figsize=(6,6))
for i,ix in enumerate(idx):
  plt.subplot(3,3,i+1); plt.imshow(X_test[ix]); plt.axis('off'); plt.title(f"Pred:{class_names[y_pred[ix]]}\nTrue:{class_names[y_test[ix]]}",fontsize=8)
plt.tight_layout(); plt.show()

os.makedirs('models',exist_ok=True); model.save('models/astron_cnn.keras')
json.dump(class_names, open('class_names.json','w'))