In [None]:
import numpy as np 
import pandas as pd 
import math
import matplotlib.pyplot as plt
import keras
from warnings import filterwarnings
filterwarnings('ignore')

In [None]:
# load data
train = pd.read_csv("./train.csv", index_col=0)
test = pd.read_csv("./test.csv", index_col=0).reset_index(drop=True)
submission = pd.read_csv("./sample_submission.csv")

In [None]:
train = train.dropna()
train = train.drop(train[train['dered_g']==-9999].index)
train = train.drop(train[train['dered_i']==-9999].index)
train = train.drop(train[train['dered_z']==-9999].index).reset_index(drop=True)

In [None]:
train['u-g'] = train['u'] - train['g']
train['g-r'] = train['g'] - train['r']
train['r-i'] = train['r'] - train['i']
train['i-z'] = train['i'] - train['z']
train['dered_u-g'] = train['dered_u'] - train['dered_g']
train['dered_g-r'] = train['dered_g'] - train['dered_r']
train['dered_r-i'] = train['dered_r'] - train['dered_i']
train['dered_i-z'] = train['dered_i'] - train['dered_z']

test['u-g'] = test['u'] - test['g']
test['g-r'] = test['g'] - test['r']
test['r-i'] = test['r'] - test['i']
test['i-z'] = test['i'] - test['z']
test['dered_u-g'] = test['dered_u'] - test['dered_g']
test['dered_g-r'] = test['dered_g'] - test['dered_r']
test['dered_r-i'] = test['dered_r'] - test['dered_i']
test['dered_i-z'] = test['dered_i'] - test['dered_z']

In [None]:
train['r_u-g'] = train['redshift'] * train['u-g']
train['r_g-r'] = train['redshift'] * train['g-r']
train['r_r-i'] = train['redshift'] * train['r-i']
train['r_i-z'] = train['redshift'] * train['i-z']
train['r_dered_u-g'] = train['redshift'] * train['dered_u-g']
train['r_dered_g-r'] = train['redshift'] * train['dered_g-r']
train['r_dered_r-i'] = train['redshift'] * train['dered_r-i']
train['r_dered_i-z'] = train['redshift'] * train['dered_i-z']

test['r_u-g'] = test['redshift'] * test['u-g']
test['r_g-r'] = test['redshift'] * test['g-r']
test['r_r-i'] = test['redshift'] * test['r-i']
test['r_i-z'] = test['redshift'] * test['i-z']
test['r_dered_u-g'] = test['redshift'] * test['dered_u-g']
test['r_dered_g-r'] = test['redshift'] * test['dered_g-r']
test['r_dered_r-i'] = test['redshift'] * test['dered_r-i']
test['r_dered_i-z'] = test['redshift'] * test['dered_i-z']

In [None]:
# zip 함수를 이용하여 각 Magnitude별 max, min, max-min, std, sum을 구한다.
ugriz = ['u', 'g', 'r', 'i', 'z']

for a, b in zip(ugriz, [ugriz, ugriz, ugriz, ugriz, ugriz]):
    train[f'{a}_max'] = train[b].max(axis=1)
    test[f'{a}_max'] = test[b].max(axis=1)
    
    train[f'{a}_min'] = train[b].min(axis=1)
    test[f'{a}_min'] = test[b].min(axis=1)
    
    train[f'{a}_diff'] = train[f'{a}_max'] - train[f'{a}_min']
    test[f'{a}_diff'] = test[f'{a}_max'] - test[f'{a}_min']
    
    train[f'{a}_sum'] = train[b].sum(axis=1)
    test[f'{a}_sum'] = test[b].sum(axis=1)

In [None]:
dered_ugriz = ['dered_u', 'dered_g', 'dered_r', 'dered_i', 'dered_z']

for a, b in zip(dered_ugriz, [dered_ugriz, dered_ugriz, dered_ugriz, dered_ugriz, dered_ugriz]):
    train[f'{a}_max'] = train[b].max(axis=1)
    test[f'{a}_max'] = test[b].max(axis=1)
    
    train[f'{a}_min'] = train[b].min(axis=1)
    test[f'{a}_min'] = test[b].min(axis=1)
    
    train[f'{a}_diff'] = train[f'{a}_max'] - train[f'{a}_min']
    test[f'{a}_diff'] = test[f'{a}_max'] - test[f'{a}_min']
    
    train[f'{a}_sum'] = train[b].sum(axis=1)
    test[f'{a}_sum'] = test[b].sum(axis=1)

In [None]:
# https://classic.sdss.org/education/kron_ARCS.pdf
distance_train = []
for rs in train['redshift']:
    if rs > 0:
        distance_train.append((rs / (1+rs)) * 13.5 * 10**9)
    else:
        distance_train.append(10)

train['distance'] = distance_train

In [None]:
# https://classic.sdss.org/education/kron_ARCS.pdf
distance_test = []
for rs in test['redshift']:
    if rs > 0:
        distance_test.append((rs / (1+rs)) * 13.5 * 10**9)
    else:
        distance_test.append(10)

test['distance'] = distance_test

In [None]:
# n값 처리
train['nO-nD'] = train['nObserve'] - train['nDetect']
test['nO-nD'] = test['nObserve'] - test['nDetect']

In [None]:
# 단순 나눗셈
train['u/dered_u'] = train['u'] / train['dered_u'] - 1
train['g/dered_g'] = train['g'] / train['dered_g'] - 1
train['r/dered_r'] = train['r'] / train['dered_r'] - 1
train['i/dered_i'] = train['i'] / train['dered_i'] - 1
train['z/dered_z'] = train['z'] / train['dered_z'] - 1

test['u/dered_u'] = test['u'] / test['dered_u'] - 1
test['g/dered_g'] = test['g'] / test['dered_g'] - 1
test['r/dered_r'] = test['r'] / test['dered_r'] - 1
test['i/dered_i'] = test['i'] / test['dered_i'] - 1
test['z/dered_z'] = test['z'] / test['dered_z'] - 1

In [None]:
train['M_u'] = train['u'] - 5*(np.log(train['distance']) - 1)
train['M_g'] = train['g'] - 5*(np.log(train['distance']) - 1)
train['M_r'] = train['r'] - 5*(np.log(train['distance']) - 1)
train['M_i'] = train['i'] - 5*(np.log(train['distance']) - 1)
train['M_z'] = train['z'] - 5*(np.log(train['distance']) - 1)
train['M_dered_u'] = train['dered_u'] - 5*(np.log(train['distance']) - 1)
train['M_dered_g'] = train['dered_g'] - 5*(np.log(train['distance']) - 1)
train['M_dered_r'] = train['dered_r'] - 5*(np.log(train['distance']) - 1)
train['M_dered_i'] = train['dered_i'] - 5*(np.log(train['distance']) - 1)
train['M_dered_z'] = train['dered_z'] - 5*(np.log(train['distance']) - 1)
train['max_dered_M'] = train[['M_dered_u', 'M_dered_g', 'M_dered_r', 'M_dered_i', 'M_dered_z']].max(axis=1)

test['M_u'] = test['u'] - 5*(np.log(test['distance']) - 1)
test['M_g'] = test['g'] - 5*(np.log(test['distance']) - 1)
test['M_r'] = test['r'] - 5*(np.log(test['distance']) - 1)
test['M_i'] = test['i'] - 5*(np.log(test['distance']) - 1)
test['M_z'] = test['z'] - 5*(np.log(test['distance']) - 1)
test['M_dered_u'] = test['dered_u'] - 5*(np.log(test['distance']) - 1)
test['M_dered_g'] = test['dered_g'] - 5*(np.log(test['distance']) - 1)
test['M_dered_r'] = test['dered_r'] - 5*(np.log(test['distance']) - 1)
test['M_dered_i'] = test['dered_i'] - 5*(np.log(test['distance']) - 1)
test['M_dered_z'] = test['dered_z'] - 5*(np.log(test['distance']) - 1)
test['max_dered_M'] = test[['M_dered_u', 'M_dered_g', 'M_dered_r', 'M_dered_i', 'M_dered_z']].max(axis=1)

In [None]:
train['M_dered_u-g'] = train['M_dered_u'] - train['M_dered_g']
train['M_dered_g-r'] = train['M_dered_g'] - train['M_dered_r']
train['M_dered_r-i'] = train['M_dered_r'] - train['M_dered_i']
train['M_dered_i-z'] = train['M_dered_i'] - train['M_dered_z']

test['M_dered_u-g'] = test['M_dered_u'] - test['M_dered_g']
test['M_dered_g-r'] = test['M_dered_g'] - test['M_dered_r']
test['M_dered_r-i'] = test['M_dered_r'] - test['M_dered_i']
test['M_dered_i-z'] = test['M_dered_i'] - test['M_dered_z']

In [None]:
bad_feature = ['g_max', 'g_min', 'g_diff', 'g_sum', 'r_max', 'r_min', 'r_diff', 'r_sum',
               'i_max', 'i_min', 'i_diff', 'i_sum', 'z_max', 'z_min', 'z_diff', 'z_sum',
               'dered_g_max', 'dered_g_min', 'dered_g_diff', 'dered_g_sum',
               'dered_r_max', 'dered_r_min', 'dered_r_diff', 'dered_r_sum',
               'dered_i_max', 'dered_i_min', 'dered_i_diff', 'dered_i_sum',
               'dered_z_max', 'dered_z_min', 'dered_z_diff', 'dered_z_sum',
               'class']

In [None]:
useful_columns = [c for c in train.columns if c not in bad_feature]

In [None]:
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from sklearn.preprocessing import RobustScaler

X = train[useful_columns]
labels = to_categorical(train['class'], num_classes=3)

# scale features
scaler = RobustScaler()
scaler.fit(X) # fit scaler to training data only
X = pd.DataFrame(scaler.transform(X), columns=X.columns)
test = pd.DataFrame(scaler.transform(test[useful_columns]), columns=test[useful_columns].columns)

x_train,x_test,y_train,y_test = train_test_split(X,labels,test_size=0.25,random_state=42)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.utils import plot_model
from keras.losses import categorical_crossentropy

def build_ann_model():
    model = Sequential()
    
    model.add(Dense(units=1024,activation="relu",input_dim=x_train.shape[1]))
    model.add(Dense(units=512,activation="relu"))
    model.add(Dense(units=256,activation="relu"))
    model.add(Dense(units=y_train.shape[1],activation="softmax"))
    
    optimizer = Adam(lr=0.000015,beta_1=0.9,beta_2=0.999)
    
    model.compile(optimizer=optimizer,metrics=["accuracy"],loss=categorical_crossentropy)
    return model

In [None]:
ann_model = build_ann_model()
plot_model(ann_model,show_shapes=True)

In [None]:
# ann_history = ann_model.fit(x_train,y_train,epochs=20,batch_size=64,validation_split = 0.2,shuffle=True)
# ypred = ann_model.predict(x_test)

In [None]:
# from sklearn.metrics import accuracy_score, confusion_matrix
# import seaborn as sns

# ann_accuracy = accuracy_score(y_test.argmax(axis=-1),ypred.argmax(axis=-1))
# #print("ANN Accuracy:",ann_accuracy)
# ann_cn = confusion_matrix(y_test.argmax(axis=-1),ypred.argmax(axis=-1))
# plt.subplots(figsize=(9,7))
# sns.heatmap(ann_cn,annot=True,fmt="1d",cbar=False,xticklabels=[0, 1, 2],yticklabels=[0, 1, 2])
# plt.title("ANN Accuracy: {}".format(ann_accuracy),fontsize=50)
# plt.xlabel("Predicted",fontsize=15)
# plt.ylabel("Actual",fontsize=15)
# plt.show()

In [None]:
# fig3, axe1 = plt.subplots(nrows=1, ncols=2, figsize=(15,5))
# axe1[0].plot(ann_history.history["accuracy"],label="accuracy",color="blue")
# axe1[1].plot(ann_history.history["loss"],label="loss",color="red")
# axe1[0].title.set_text("ANN Accuracy")
# axe1[1].title.set_text("ANN Loss")
# axe1[0].set_xlabel("Epoch")
# axe1[1].set_xlabel("Epoch")
# axe1[0].set_ylabel("Rate")
# plt.show()

In [None]:
# results = ann_model.evaluate(x_test, y_test)
# print('Test accuracy: ', results[1])

In [None]:
# ann_submit = ann_model.predict(test)
# print(ann_submit)
# ann_submit = np.argmax(ann_submit, axis=1).reshape(-1, 1)

In [None]:
# from keras.preprocessing.sequence import pad_sequences

#preparing x for CNN
x_train = np.array(x_train).reshape((x_train.shape[0], 8, 8, 1))
x_test = np.array(x_test).reshape((x_test.shape[0], 8, 8, 1))
test = np.array(test).reshape((test.shape[0], 8, 8, 1))

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)


In [None]:
from keras import optimizers
from keras.layers import Dense, Activation, Flatten, Conv2D, MaxPooling2D

def deep_cnn():
    model = Sequential()

    model.add(Conv2D(input_shape = (x_train.shape[1], x_train.shape[2], x_train.shape[3]), filters = 64, kernel_size = (3,3), strides = (1,1), padding = 'same'))
    model.add(Activation('relu'))
    model.add(Conv2D(filters = 64, kernel_size = (3,3), strides = (1,1), padding = 'same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size = (2,2)))
    model.add(Conv2D(filters = 32, kernel_size = (3,3), strides = (1,1), padding = 'same'))
    model.add(Activation('relu'))
    model.add(Conv2D(filters = 32, kernel_size = (3,3), strides = (1,1), padding = 'same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size = (2,2)))
    model.add(Conv2D(filters = 16, kernel_size = (3,3), strides = (1,1), padding = 'same'))
    model.add(Activation('relu'))
    model.add(Conv2D(filters = 16, kernel_size = (3,3), strides = (1,1), padding = 'same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size = (2,2)))

    # prior layer should be flattend to be connected to dense layers
    model.add(Flatten())
    # dense layer with 50 neurons
    model.add(Dense(8, activation = 'relu'))
    # final layer with 3 neurons to classify the instances
    model.add(Dense(3, activation = 'softmax'))

    adam = optimizers.Adam(lr = 0.001)
    model.compile(optimizer = adam, metrics = ['accuracy'], loss = 'categorical_crossentropy')

    return model

cnn_model = deep_cnn()
cnn_model.summary()

In [None]:
cnn_history = cnn_model.fit(x_train,y_train,epochs=20,batch_size=8,validation_split=0.2,shuffle=True)
pred = cnn_model.predict(X_test)

In [None]:
cnn_accuracy = accuracy_score(y_test.argmax(axis=-1),pred.argmax(axis=-1))
#print("CNN Accuracy:",ann_accuracy)
cnn_cn = confusion_matrix(y_test.argmax(axis=-1),pred.argmax(axis=-1))
plt.subplots(figsize=(9,7))
sns.heatmap(cnn_cn,annot=True,fmt="1d",cbar=False,xticklabels=[0, 1, 2],yticklabels=[0, 1, 2])
plt.title("CNN Accuracy: {}".format(cnn_accuracy),fontsize=50)
plt.xlabel("Predicted",fontsize=15)
plt.ylabel("Actual",fontsize=15)
plt.show()

In [None]:
fig3, axe1 = plt.subplots(nrows=1, ncols=2, figsize=(15,5))
axe1[0].plot(cnn_history.history["accuracy"],label="accuracy",color="blue")
axe1[1].plot(cnn_history.history["loss"],label="loss",color="red")
axe1[0].title.set_text("CNN Accuracy")
axe1[1].title.set_text("CNN Loss")
axe1[0].set_xlabel("Epoch")
axe1[1].set_xlabel("Epoch")
axe1[0].set_ylabel("Rate")
plt.show()

In [None]:
results = cnn_model.evaluate(x_test, y_test)
print('Test accuracy: ', results[1])