In [39]:
from matplotlib import pyplot as plt
from tabulate import tabulate
import numpy as np
import os
import pandas as pd
import pickle
import seaborn as sns
from sklearn.metrics import classification_report

plt.style.use('dark_background')
sns.set_palette("Set1")

In [40]:
features = ['ra', 'dec', 'u', 'g', 'r', 'i', 'z', 'redshift']
#fi_cols = ['redshift', 'g-r', 'i-z', 'u-r', 'i-r', 'z-r', 'g']
fi_cols = ['u', 'g', 'r', 'redshift', 'g-r', 'i-z', 'u-r', 'i-r', 'z-r']
target = 'class'

In [41]:
def fetch_data(features):
    data = {f: [float(input("  '{}': ".format(f)))] for f in features}
    df = pd.DataFrame(data=data)
    print("Raw data is fetched successfully.")
    return df

In [42]:
# 12 for all other than 1 for redshift
df = fetch_data(features=features)

Raw data is fetched successfully.


In [43]:
def preprocess(df, features):
    scale = 'data/scaling.pkl'
    with open(file=scale, mode='rb') as pre_pkl:
        scaling = pickle.load(file=pre_pkl)
    
    df = scaling.transform(X=df)
    df = pd.DataFrame(data=df, columns=features)
    return df

In [44]:
df = preprocess(df=df, features=features)
display(df)

Unnamed: 0,ra,dec,u,g,r,i,z,redshift
0,0.033316,0.310674,0.015776,0.109115,0.148938,0.176819,0.15406,0.142383


In [45]:
def featurize(df):
    df['g-r'] = df['g'] - df['r']
    df['i-z'] = df['i'] - df['z']
    df['u-r'] = df['u'] - df['r']
    df['i-r'] = df['i'] - df['r']
    df['z-r'] = df['z'] - df['r']
    df = df[fi_cols]
    return df

In [46]:
df = featurize(df)

In [47]:
def prediction(X):
    model = 'data/model_dumps/model_stacking_classifier.pkl'
    with open(file=model, mode='rb') as m_pkl:
        clf = pickle.load(file=m_pkl)
    
    pred_proba = clf.predict_proba(X=X)
    confidence = np.round(a=np.max(pred_proba)*100, decimals=2)
    pred_class = clf.predict(X=X)[0]
    if pred_class == 'QSO': pred_class = 'Quasar'
    else: pred_class = 'Star'
    print(f"The predicted class is '{pred_class}' with a confidence of {confidence}%.")

In [48]:
prediction(X=df)

The predicted class is 'Quasar' with a confidence of 97.96%.




In [49]:
def ml_pipeline(features):
    df = fetch_data(features=features)
    df = preprocess(df=df, features=features)
    df = featurize(df=df)
    prediction(X=df)

In [50]:
# All 15
ml_pipeline(features=features)

Raw data is fetched successfully.
The predicted class is 'Quasar' with a confidence of 100.0%.




In [51]:
def pipeline_for_whole_test_data(features, target='class'):
    data = pd.read_csv(filepath_or_buffer='data/test_data.csv')
    
    X_test = data[features]
    y_test = data[target].values
    
    X_test = featurize(df=X_test)
    
    model = 'data/model_dumps/model_stacking_classifier.pkl'
    with open(file=model, mode='rb') as m_pkl:
        clf = pickle.load(file=m_pkl)
    
    cm_pred = clf.predict(X=X_test)
    
    print(classification_report(y_true=y_test, y_pred=cm_pred))

In [52]:
pipeline_for_whole_test_data(features)



              precision    recall  f1-score   support

         QSO       1.00      1.00      1.00     10048
        STAR       1.00      1.00      1.00      9952

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000

