<a href="https://colab.research.google.com/github/LayanAlrashoud/Space-Categorization/blob/main/Space.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import kagglehub
import os
import shutil
from sklearn.model_selection import train_test_split

# 1. تحميل البيانات من Kaggle
path = kagglehub.dataset_download("abhikalpsrivastava15/space-images-category")

# طباعة المسار إلى الملفات
print("Path to dataset files:", path)

# 2. المسار إلى مجلد "space images"
space_images_path = os.path.join(path, "space images")

# التحقق من المجلد الرئيسي
if os.path.exists(space_images_path):
    categories = [folder_name for folder_name in os.listdir(space_images_path)
                  if os.path.isdir(os.path.join(space_images_path, folder_name)) and not folder_name in ["train", "val", "test"]]
    print("Actual folder names in 'space images':", categories)
else:
    print("'space images' folder does not exist. Please check the dataset structure.")

# 3. إنشاء مسارات `train`, `val`, `test`
output_dirs = {
    "train": os.path.join(path, "train"),
    "val": os.path.join(path, "val"),
    "test": os.path.join(path, "test"),
}

for output_dir in output_dirs.values():
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

# 4. تقسيم الصور بين `train`, `val`, `test`
def split_data(category_path, category_name, output_dirs):
    # جمع الصور
    valid_extensions = ('.png', '.jpg', '.jpeg')
    images = [os.path.join(category_path, img) for img in os.listdir(category_path) if img.endswith(valid_extensions)]

    # تحقق من وجود الصور
    if len(images) == 0:
        print(f"No valid images found in {category_path}")
        return

    # تقسيم الصور
    train_images, test_images = train_test_split(images, test_size=0.3, random_state=42)
    val_images, test_images = train_test_split(test_images, test_size=0.33, random_state=42)

    # نسخ الصور إلى المجلدات المناسبة
    for img in train_images:
        shutil.copy(img, os.path.join(output_dirs["train"], category_name))
    for img in val_images:
        shutil.copy(img, os.path.join(output_dirs["val"], category_name))
    for img in test_images:
        shutil.copy(img, os.path.join(output_dirs["test"], category_name))

# 5. تنفيذ التقسيم
for category in categories:
    category_path = os.path.join(space_images_path, category)
    category_cleaned = category.replace(" - Google Search", "")
    for output_dir in output_dirs.values():
        category_folder = os.path.join(output_dir, category_cleaned)
        if not os.path.exists(category_folder):
            os.makedirs(category_folder)
    split_data(category_path, category_cleaned, output_dirs)

print("Data has been split into train, val, and test folders successfully!")


Downloading from https://www.kaggle.com/api/v1/datasets/download/abhikalpsrivastava15/space-images-category?dataset_version_number=1...


100%|██████████| 464M/464M [00:07<00:00, 65.0MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/abhikalpsrivastava15/space-images-category/versions/1
Actual folder names in 'space images': ['constellation - Google Search', 'nebula - Google Search', 'planets - Google Search', 'stars - Google Search', 'galaxies - Google Search', 'cosmos space - Google Search']
Data has been split into train, val, and test folders successfully!


In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# إعداد تحسين البيانات
data_gen_args = {
    "rescale": 1.0 / 255,
    "rotation_range": 20,
    "width_shift_range": 0.1,
    "height_shift_range": 0.1,
    "zoom_range": 0.1,
    "horizontal_flip": True,
    "fill_mode": "nearest",
}

train_datagen = ImageDataGenerator(**data_gen_args)
val_test_datagen = ImageDataGenerator(rescale=1.0 / 255)

train_generator = train_datagen.flow_from_directory(
    output_dirs["train"],
    target_size=(150, 150),
    batch_size=32,
    class_mode="categorical"
)

val_generator = val_test_datagen.flow_from_directory(
    output_dirs["val"],
    target_size=(150, 150),
    batch_size=32,
    class_mode="categorical"
)

test_generator = val_test_datagen.flow_from_directory(
    output_dirs["test"],
    target_size=(150, 150),
    batch_size=32,
    class_mode="categorical"
)

# تحميل VGG16
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(150, 150, 3))
base_model.trainable = True

# إلغاء تجميد آخر 8 طبقات فقط
for layer in base_model.layers[:-6]:
    layer.trainable = False

# بناء النموذج
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(train_generator.class_indices), activation='softmax')
])

# تجميع النموذج
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# إعداد الكولباك
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-7)

# تدريب النموذج
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    callbacks=[early_stopping, reduce_lr]
)

# تقييم النموذج
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

# حفظ النموذج
model.save("space_classification_model_vgg16_improved.keras")
print("Model has been saved successfully!")


Found 773 images belonging to 6 classes.
Found 221 images belonging to 6 classes.
Found 113 images belonging to 6 classes.
Epoch 1/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m362s[0m 14s/step - accuracy: 0.1743 - loss: 1.8792 - val_accuracy: 0.4072 - val_loss: 1.6359 - learning_rate: 1.0000e-05
Epoch 2/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m380s[0m 14s/step - accuracy: 0.2623 - loss: 1.7135 - val_accuracy: 0.5113 - val_loss: 1.4808 - learning_rate: 1.0000e-05
Epoch 3/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m382s[0m 14s/step - accuracy: 0.3539 - loss: 1.5882 - val_accuracy: 0.5339 - val_loss: 1.3244 - learning_rate: 1.0000e-05
Epoch 4/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m401s[0m 15s/step - accuracy: 0.4282 - loss: 1.4316 - val_accuracy: 0.5430 - val_loss: 1.2181 - learning_rate: 1.0000e-05
Epoch 5/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m362s[0m 14s/step - accuracy: 0.5134 - loss: 1.33