In [None]:
from matplotlib import pyplot as plt
import os
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

plt.style.use('dark_background')
sns.set_palette("Set1")

scaling = MinMaxScaler()

In [None]:
def mkdf(array, columns):
    return pd.DataFrame(data=array, columns=columns)

def featurize(df):
    df['g-r'] = df['g'] - df['r']
    df['i-z'] = df['i'] - df['z']
    df['u-r'] = df['u'] - df['r']
    df['i-r'] = df['i'] - df['r']
    df['z-r'] = df['z'] - df['r']
    return df

In [None]:
df = pd.read_csv('SDSS_Data_F.csv')

In [None]:
imp_cols = ['ra', 'dec', 'u', 'g', 'r', 'i', 'z', 'redshift']
f_cols = ['u', 'g', 'r', 'redshift', 'g-r', 'i-z', 'u-r', 'i-r', 'z-r']

In [None]:
xdf = df[imp_cols]
ydf = df['class'].values

In [None]:
(X_train,
 X_test,
 y_train,
 y_test) = train_test_split(xdf, ydf,
                            stratify=ydf, test_size=0.30, random_state=0)

(X_train,
 X_cv,
 y_train,
 y_cv) = train_test_split(X_train, y_train,
                          stratify=y_train, test_size=0.30, random_state=0)

X_train = scaling.fit_transform(X=X_train)
X_cv = scaling.transform(X=X_cv)
X_test = scaling.transform(X=X_test)

In [None]:
train_df_fea = mkdf(X_train, imp_cols)
cv_df_fea = mkdf(X_cv, imp_cols)
test_df_fea = mkdf(X_test, imp_cols)

train_df_fea = featurize(train_df_fea)
cv_df_fea = featurize(cv_df_fea)
test_df_fea = featurize(test_df_fea)

In [None]:
X_train_fea = train_df_fea[f_cols].values
X_cv_fea = cv_df_fea[f_cols].values
X_test_fea = test_df_fea[f_cols].values

In [None]:
def export_data(data, target_arr, filename):
    if os.path.isdir('./data'):
        pass
    else:
        os.mkdir(path='./data')
    
    data['class'] = target_arr
    data.to_csv(path_or_buf=os.path.join('./data', filename), index=None)
    print("The data is exported to '{}'.".format(filename))

In [None]:
export_data(data=train_df_fea[f_cols], target_arr=y_train, 
            filename='train_fea.csv')
export_data(data=cv_df_fea[f_cols], target_arr=y_cv,
            filename='cv_fea.csv')
export_data(data=test_df_fea[f_cols], target_arr=y_test,
            filename='test_fea.csv')
export_data(data=test_df_fea[imp_cols], target_arr=y_test,
            filename='test_data.csv')