In [1]:
import os
import numpy as np
import pandas as pd
from pathlib import Path

CSV_FILE = 'sdss_data.csv'
IMG_DIR = 'sdss_images'
IMG_SIZE = 128

df = pd.read_csv(CSV_FILE, comment = '#')
print(f'Successfully loaded {len(df)} galaxy coordinates.')
df.head()

Successfully loaded 1000 galaxy coordinates.


Unnamed: 0,objID,ra,dec,modelMag_u,modelMag_g,modelMag_r,modelMag_i,modelMag_z,petroRad_r,petroRad_g,z,zErr
0,1237648704585662641,203.042941,0.014639,20.69178,18.76838,17.68217,17.18957,16.84236,3.183995,3.634754,0.152338,3.5e-05
1,1237648704585662865,202.931765,0.114071,23.79132,21.99652,20.40593,19.48305,19.25461,3.342154,2.969919,0.566904,0.000171
2,1237648704585662884,202.940071,0.204344,23.20472,22.23272,20.48755,19.57254,18.90709,1.88121,1.649263,0.504841,0.000155
3,1237648704585663201,203.047676,0.031271,24.53938,22.03534,20.36415,19.34721,18.97953,2.700954,2.969919,0.499408,0.000177
4,1237648704585663335,202.961607,0.062155,25.04012,23.03566,21.02226,19.88334,19.45707,2.969387,2.969919,0.616243,0.000292


In [13]:
from keras.utils import load_img, img_to_array
from PIL import Image

def load_images(df: pd.DataFrame, image_dir: Path) -> np.array:
    images = []

    for index, _ in df.iterrows():
        img_path = os.path.join(image_dir, f'image_{index:04d}.jpg')
        # print(f'Loading {img_path}')
        try:
            img = load_img(img_path, target_size = (IMG_SIZE, IMG_SIZE))
            img_array = img_to_array(img) / 255.0
            images.append(img_array)
        except Exception as e:
            print(f'Could not load image path {img_path}: {e}')
    images = np.array(images)
    return images

In [14]:
images = load_images(df, Path('sdss_images'))
print(images.shape)
labels = df['z'].values
# print(labels)

(1000, 128, 128, 3)


In [26]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size = 0.25, random_state = 396)

In [31]:
import tensorflow as tf
from keras import layers
from keras.models import Sequential

def build_redshift_cnn(input_shape: np.shape) -> tf.keras.Model:
    model = Sequential(
        # Using 3 3x3 conv filters
        [
            layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = input_shape, name = 'conv1'),
            layers.MaxPooling2D((2, 2), name = 'pool1', padding = 'valid'),

            layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = input_shape, name = 'conv2'),
            layers.MaxPooling2D((2, 2), name = 'pool2', padding = 'valid'),

            layers.Conv2D(128, (3, 3), activation = 'relu', input_shape = input_shape, name = 'conv3'),
            layers.MaxPooling2D((2, 2), name = 'pool3', padding = 'valid'),

            layers.Flatten(name = 'flatten'), # May need to replace with GlobalAveragePooling to cut params
            layers.Dense(32, activation = 'relu', name = 'fconn1'),
            layers.Dropout(0.5, name = 'dropout'),
            layers.Dense(1, activation = 'linear', name = 'output'),
        ]
    )
    model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['mean_absolute_error'])
    return model

model = build_redshift_cnn((128, 128, 3))
print(model.summary())

None


In [32]:
print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'X_val shape: {X_val.shape}')
print(f'y_val shape: {y_val.shape}')

X_train shape: (750, 128, 128, 3)
y_train shape: (750,)
X_val shape: (250, 128, 128, 3)
y_val shape: (250,)


In [None]:
# Will train the model here