# Required modules

In [1]:
import os
import sys
import numpy as np
import pandas as pd
import pickle
import random

In [2]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
from utilities.data_downloader import train_val_test_downloader, choose_one_column
from utilities.plots import visualize_latent

In [4]:
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KernelDensity
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import KernelPCA
from sklearn.pipeline import Pipeline

In [5]:
random.seed(42)
np.random.seed(42)

# Analysis Pipeline

In [6]:
def roc_auc_scoring(estimator, X, y)->float:
    yscore = estimator.score_samples(X)
    ytrue = y
    return roc_auc_score(ytrue, yscore)

In [12]:
def make_pipe():
    """
    Makes GridSearchCV pipeline
    """
    return Pipeline(
        [('imputer', SimpleImputer(
            missing_values=np.nan, strategy='median')),
         ('scaler', RobustScaler()),
         ('model', GridSearchCV(
             Pipeline([
                 ('KernelPCA', KernelPCA(random_state=42)),
                 ('KDE', KernelDensity(kernel='gaussian', bandwidth='silverman'))
             ]),
             param_grid={
                 'KernelPCA__kernel': ('linear', 'poly', 'rbf',
                        'sigmoid', 'cosine'),
                 'KernelPCA__degree': (2, 3, 4),
                 'KernelPCA__gamma': (0.1, 0.3, 1.0)},
             scoring=roc_auc_scoring)),
        ]
    )

# Extracted Features Analysis

In [8]:
train, val, test, labels = train_val_test_downloader('features')

Datasets downloaded
 - train  : 810 entries
 - val    : 174 entries
 - test   : 174 entries
 - labels : 1158 entries


In [9]:
ytrain = labels.loc[train.index, 'FlaresFlag']

In [13]:
pipe = make_pipe()

In [None]:
pipe.fit(train, ytrain)

# Rebinned Lightcurves Analysis

In [None]:
train, val, test = tuple(
    map(lambda df: choose_one_column(df, 'lgRate'),
    train_val_test_downloader('interp'))
)