#Initialization

In [1]:
import tensorflow as tf
from tensorflow.keras import layers

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from google.colab import drive

drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [2]:
#Create data frame
filePath = '/content/drive/My Drive/*PHYS177/Project/Skyserver_SQL2_18_2024 8 52 51 AM.csv'
df = pd.read_csv(filePath, low_memory = False, header = 1)

In [3]:
#Verify data is loaded into data frame
print(df.head())

                 objid          ra        dec         u         g         r  \
0  1237668332026986542  217.940001  14.608378  19.13548  18.55482  17.95603   
1  1237661971724501194  189.744793   7.650623  19.21402  17.27192  16.38074   
2  1237673706652434520  116.303083  42.455980  18.47633  17.30546  17.24116   
3  1237658423557816441  169.129790   7.152575  17.47752  16.23605  15.61560   
4  1237658423538549134  125.039334   3.642783  19.35331  18.05971  17.41365   

          i         z   run  rerun  camcol  field            specobjid  \
0  17.68272  17.63717  5322    301       3     56  6154252554903769088   
1  15.96969  15.62344  3841    301       4    166  1833117663862220800   
2  17.32780  17.37114  6573    301       6    220  9333948945297330176   
3  15.29749  15.03419  3015    301       3    367  1820656067877562368   
4  17.00762  16.73926  3015    301       3     73  1333227711975417856   

    class  redshift  plate    mjd  fiberid  
0     QSO  1.802680   5466  56033  

In [4]:
# Input DataFrame
input_columns = ['u', 'g', 'r', 'i', 'z', 'redshift']
df_input = df[input_columns]

# Output DataFrame
output_columns = ['class']
df_output = df[output_columns]
df_output_modified = df_output.copy()

df_output_modified.loc[:, 'class'] = df_output_modified['class'].apply(lambda x: 1 if x == 'QSO' else 0)

  df_output_modified.loc[:, 'class'] = df_output_modified['class'].apply(lambda x: 1 if x == 'QSO' else 0)


In [5]:
#verify data is properly transformed
print(df_input.head())
print("\n")
print(df_output.head())
print("\n")
print(df_output_modified.head())

          u         g         r         i         z  redshift
0  19.13548  18.55482  17.95603  17.68272  17.63717  1.802680
1  19.21402  17.27192  16.38074  15.96969  15.62344  0.075626
2  18.47633  17.30546  17.24116  17.32780  17.37114 -0.000093
3  17.47752  16.23605  15.61560  15.29749  15.03419  0.037549
4  19.35331  18.05971  17.41365  17.00762  16.73926  0.093936


    class
0     QSO
1  GALAXY
2    STAR
3  GALAXY
4  GALAXY


   class
0      1
1      0
2      0
3      0
4      0


In [6]:
#Converting to numpy arrays
X = df_input.to_numpy()
y = df_output_modified['class'].to_numpy()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#Constructing Model

In [7]:
model = tf.keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dropout(0.5),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

#Train the model

In [8]:
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


#Model Evaluation

In [9]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)


625/625 - 1s - loss: 0.0245 - accuracy: 0.9942 - 1s/epoch - 2ms/step

Test accuracy: 0.9942499995231628


In [10]:
start_index = 0
n_rows = 50


end_index = start_index + n_rows  # Calculate the end index

#Bounds checking
if end_index > len(X_test):
      print("Requested range exceeds dataset bounds. Adjusting to print till the end of the dataset.")
      end_index = len(X_test)

#Print table
for i in range(start_index, end_index):
    print(f"{i}: Features: {X_test[i]}, Label: {y_test[i]}")

0: Features: [ 0.53788103 -0.4028833  -0.90498937 -1.02269677 -1.10591296 -0.02685488], Label: 0
1: Features: [ 0.83252623 -0.07219777 -0.40568328 -0.52592351 -0.61741445 -0.22173901], Label: 0
2: Features: [ 0.00265814 -0.12688651 -0.11527226 -0.06402413 -0.02410027 -0.38994148], Label: 0
3: Features: [-0.01996807  0.99071709  1.20054117  1.08791244  1.06895185  3.94856907], Label: 1
4: Features: [-0.47747639  0.70596686  0.91233257  1.06897665  1.09939879  1.91495601], Label: 1
5: Features: [ 0.62397834 -0.12121299 -0.59653613 -0.77198962 -0.87443366 -0.1346559 ], Label: 0
6: Features: [ 0.95813535  0.13053961 -0.19456817 -0.29360711 -0.37302889 -0.22213641], Label: 0
7: Features: [ 1.13643089  0.25482022 -0.20981772 -0.33794475 -0.46072449 -0.134008  ], Label: 0
8: Features: [ 0.15466452  0.01370549  0.34029378  0.54396074  0.65969795 -0.39047713], Label: 0
9: Features: [-0.26682681  0.80252825  1.18478947  1.29142465  1.26419704  1.1795099 ], Label: 1
10: Features: [ 1.08976132  0.

In [16]:
index = 9


selected_row = X_test[index].reshape(1, -1)
predicted = model.predict(selected_row)
predicted_label = (predicted > 0.5).astype("int8")
actual_label = y_test[index]

print(f"Predicted: {predicted_label.flatten()[0]}, Actual: {actual_label}")


Predicted: 1, Actual: 1
