# Galaxy Type Classification using Galaxy10 DECals Dataset
This Jupyter Notebook demonstrates the process of building a convolutional neural network to classify types of galaxies using the Galaxy10 DECals dataset from the Hugging Face datasets.

In [3]:
# Install required libraries
%pip install -r requirements.txt

Defaulting to user installation because normal site-packages is not writeable
Collecting tensorflow
  Using cached tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)
Collecting keras>=3.0.0
  Downloading keras-3.3.3-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting wrapt>=1.11.0
  Downloading wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (80 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.3/80.3 KB[0m [31m444.5 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting h5py>=3.10.0
  Downloading h5py-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
Collecting termcolor>=1.1.0
  Dow

## Step 1: Load the Dataset

In [4]:
import h5py
import numpy as np
from huggingface_hub import hf_hub_download

# Download and load the dataset
file_path = hf_hub_download(repo_id="matthieulel/galaxy10_decals", filename="galaxy10.h5")
with h5py.File(file_path, 'r') as f:
    images = np.array(f['images'])
    labels = np.array(f['ans'])

# Normalize the images
images = images / 255.0

  from .autonotebook import tqdm as notebook_tqdm


RepositoryNotFoundError: 401 Client Error. (Request ID: Root=1-6669c0a1-633201066efcaa4a3b4063f0;ab0b5d37-2007-4363-8ef4-1f8934d89853)

Repository Not Found for url: https://huggingface.co/matthieulel/galaxy10_decals/resolve/main/galaxy10.h5.
Please make sure you specified the correct `repo_id` and `repo_type`.
If you are trying to access a private or gated repo, make sure you are authenticated.
Invalid username or password.

## Step 2: Preprocess the Data

In [None]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)

## Step 3: Build the Model

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Build the convolutional neural network model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(69, 69, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

## Step 4: Train the Model

In [None]:
# Train the model
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

## Step 5: Evaluate the Model

In [None]:
# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f"Test Accuracy: {test_acc*100:.2f}%")