In [1]:
# All the Libraries regarding NN and Data Frame Manipulation
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential 
from keras import Input 
from keras.layers import LSTM, Dense, Activation , Flatten, Reshape, SimpleRNN, Dropout, Conv1D,MaxPooling1D, ZeroPadding1D
from keras.callbacks import Callback,EarlyStopping, ModelCheckpoint

In [2]:
# Reading Dataset
df = pd.read_csv("star_classification.csv")

FileNotFoundError: ignored

In [None]:
# First 5 rows of dataset
df.head()

In [None]:
# information of dataset with datatypes
df.info()

In [None]:
# Statiscal Information with transpose Matrix
df.describe().T

In [None]:
# Checking Unique Values
df.nunique()

In [None]:
# dropping Unnecesarry Values
df = df.drop(columns=["obj_ID", "rerun_ID"])

In [None]:
# Encoding Target Columns
encode = LabelEncoder()
df['class'] = encode.fit_transform(df['class'])
df['class'].value_counts()

In [None]:
# Counting Plot
sns.countplot(data=df, x='class')
plt.title("Distribution of Classes")
plt.xlabel("Class")
plt.ylabel("Count")
plt.show()


In [None]:
# Finding Correlation
df.corr()

In [None]:
# Correalation Heatmap
plt.figure(figsize=(13,6))
dataplot = sns.heatmap(df.corr(),cmap = 'YlGnBu',annot=True)


In [None]:
#Pair Plot which describes relation between each variables
sns.pairplot(data=df, vars=['alpha', 'delta', 'u', 'g', 'r', 'i', 'z', 'redshift'], hue='class', height=2)
plt.show()


In [None]:
# histogram of Class (target variable)
df.plot(kind='hist',x = 'class')

In [None]:
x = df.drop('class',axis='columns')
y = df['class']

In [None]:
# spliting test and train
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3,random_state=42)

# normalized standard scaling
scaler = StandardScaler()
x_scl = scaler.fit_transform(x_train)
x_test_scl = scaler.transform(x_test)

In [None]:
# Converting into 3d Tensor
y_scl = to_categorical(y_train, num_classes=3)
y_test_scl = to_categorical(y_test, num_classes=3)


In [None]:
# tarining MLP Model
model = Sequential([
    Dense(64, activation='relu', input_dim=15), #Input layer
    Dropout(0.2),
    Dense(32, activation='relu'),# hidden layer
    Dropout(0.2),
    Dense(16, activation='sigmoid'),# hidden layer
    Dropout(0.2),
    Dense(3, activation='softmax')# output layer
])
early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min', restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', save_best_only=True, verbose=1)

# Compilation and fitting model with early stoping & check point
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(x_scl,y_scl,epochs=20,validation_split=0.35,callbacks=[early_stop, checkpoint],validation_data=[x_test_scl,y_test_scl])



In [None]:
# finding loss and Accuracy
loss, accuracy = model.evaluate(x_test_scl, y_test_scl)
print(loss,accuracy)

In [None]:
# plotting Loss 
loss_train = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1,21)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# plotting Accuracy
loss_accu = history.history['accuracy']
loss_val_accu = history.history['val_accuracy']
epochs = range(1,21)
plt.plot(epochs, loss_accu, 'g', label='Training Accuracy')
plt.plot(epochs, loss_val_accu, 'b', label='validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [None]:
# Reshape data to be compatible with LSTM input
x_scl_lstm = np.reshape(x_scl, (x_scl.shape[0], 1, x_scl.shape[1]))
x_test_scl_lstm = np.reshape(x_test_scl, (x_test_scl.shape[0], 1, x_test_scl.shape[1]))

model = Sequential([
    LSTM(128, input_shape=(x_scl_lstm.shape[1], x_scl_lstm.shape[2]), activation='tanh', return_sequences=True), #LSTM input layer
    Dropout(0.2),
    LSTM(64, activation='tanh'), # hidden LSTM layer
    Dense(3, activation='sigmoid')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min', restore_best_weights=True)

hist = model.fit(x_scl_lstm, y_scl, epochs=20, validation_split=0.35, batch_size=64, callbacks=[early_stop],validation_data=(x_test_scl_lstm, y_test_scl))

In [None]:
loss, accuracy = model.evaluate(x_test_scl_lstm, y_test_scl)
print(loss,accuracy)

In [None]:
loss_train = hist.history['loss']
loss_val = hist.history['val_loss']
epochs = range(1,21)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
loss_accu = hist.history['accuracy']
loss_val_accu = hist.history['val_accuracy']
epochs = range(1,21)
plt.plot(epochs, loss_accu, 'g', label='Training Accuracy')
plt.plot(epochs, loss_val_accu, 'b', label='validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [None]:
# Reshape data to be compatible with CNN input
x_scl = x_scl.reshape(-1, 15, 1)
x_test_scl = x_test_scl.reshape(-1, 15, 1)


model = Sequential([
    Conv1D(64, kernel_size=2, activation='relu', input_shape=(15, 1)),#1d Convolutional Layer
    MaxPooling1D(pool_size=2), #maxpooling maximum values of patches
    Dropout(0.2),
    Conv1D(32, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Flatten(),
    Dense(16, activation='sigmoid'),
    Dropout(0.2),
    Dense(3, activation='softmax')
])

early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min', restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', save_best_only=True, verbose=1)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(x_scl, y_scl, epochs=20, validation_split=0.35, callbacks=[early_stop, checkpoint], validation_data=(x_test_scl, y_test_scl))


In [None]:
loss, accuracy = model.evaluate(x_test_scl, y_test_scl)
print(loss,accuracy)

In [None]:
loss_train = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1,21)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
loss_accu = history.history['accuracy']
loss_val_accu = history.history['val_accuracy']
epochs = range(1,21)
plt.plot(epochs, loss_accu, 'g', label='Training Accuracy')
plt.plot(epochs, loss_val_accu, 'b', label='validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()