In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [1]:
def get_df(file_path): #get the filter dictionary from a raw data and its X points to predict
    data = read_sn(file_path)['df']
    X = np.linspace(data.MJD.min(), data.MJD.max(), 100)
    data_dict = {band: df[['MJD', 'FLUXCAL', 'FLUXCALERR']].values for band, df in data.groupby('FLT')}   
    ID = 'SN' + str(read_sn(file_path)['SNID'])
    
    return ID, data_dict, X

def get_label(file_path): #get the filter dictionary from a raw data and its X points to predict
    label = read_sn(file_path)['SIM_COMMENT'][3]
    ID = 'SN' + str(read_sn(file_path)['SNID'])
    
    return ID, label

def get_xys(df): #get the know values of a specific filter
    y = df[:, 1]
    yerr = df[:, 2]
    x_known = df[:, 0]
    
    return x_known, y, yerr

def find_object(obj, files):
    return [i for i, s in enumerate(files) if obj in s]

def cleaning_df(df, method = '', clean_neg = True, percentage = 0.5):
    if clean_neg: #verifies if the value is negative and if it is under the error margin, if it is, turn to zero
        df[(df[:, 1] < 0) & (df[:, 1] > -df[:, 2]) , 1] = 0
        df = df[(df[:, 1] > 0)] #otherwise just cut off
    if method == 'std_dev': #cuts the points with error over the mean error + 1 std
        threshold = df.mean(axis = 0)[2] + df.std(axis = 0)[2]
        df_filter = df[(threshold>df[:,2])]
    elif method == 'percentage':
        threshold = df.max(axis = 0)[1] * percentage
        df_filter = df[(threshold>df[:,2])]
    else:
        df_filter = df
    return df_filter

In [None]:
def gplc(index,x, y, yerr, npoints = 100, n_restarts_optimizer = 100):
    #print(index)
    
    X = x.reshape(-1, 1)#df.loc[:, x].values.reshape(-1, 1)
    y = y#df.loc[:, y].values
    yerr = yerr#df.loc[:, yerr].values
    
    Xmin, Xmax = X.min(), X.max()
    vary = y.var()
    
    i = y > 0
    
    yerr = yerr[i]/y[i]
    y = np.log(y[i])
    X = X[i]

    const = ConstantKernel(vary, constant_value_bounds=(1e-5, 2*vary))
    rbf = RBF(length_scale = .5*Xmax, length_scale_bounds=(1e-05, Xmax))

    kernel = const * rbf
    
    GPR = GaussianProcessRegressor(kernel = kernel, alpha = yerr**2, n_restarts_optimizer = n_restarts_optimizer)
    
    GPR.fit(X, y)
    
    nX = np.linspace(Xmin, Xmax, npoints).reshape(-1, 1)
    
    ny, covy = GPR.predict(nX, return_cov=True)

    expy = np.exp(ny)
    cov = (expy * expy.reshape(-1, 1)) * covy
    # tempo, resultado, erro
    return nX.ravel(), expy, np.sqrt(cov.diagonal())

# Apply that function only over the treated Data Frame, after the GP
def print_filters(obj):
    fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
    ax1, ax2, ax3, ax4 = axs.flatten() 

    fig.suptitle('Object : ' + obj.ID, fontsize=16)

    ###############
    y, yerr = obj.desg_GP[1], obj.desg_GP[2]
    yOG = obj.desg
    
    ax1.set_title('desg')
    ax1.set_ylabel('Flux')
    ax1.set_xlabel('Time in days')
    
    ax1.set_ylim(ymin=max(yOG[:,1])*(-0.5), ymax=max(yOG[:,1])*2)
    
    ax1.fill_between(obj.Xaxis, y-yerr, y+yerr, alpha=.4, color='g')
    ax1.plot(obj.Xaxis, y, color = 'g')
    ax1.errorbar(yOG[:,0], yOG[:,1],yerr = yOG[:,2],fmt='o', color='g')
    
    ###############
    y, yerr = obj.desi_GP[1], obj.desi_GP[2]
    yOG = obj.desi
    
    ax2.set_title('desi')
    ax2.set_ylabel('Flux')
    ax2.set_xlabel('Time in days')
    
    ax2.set_ylim(ymin=max(yOG[:,1])*(-0.5), ymax=max(yOG[:,1])*2)
    
    ax2.fill_between(obj.Xaxis, y-yerr, y+yerr, alpha=.4, color='y')
    ax2.plot(obj.Xaxis, y, color = 'y')
    ax2.errorbar(yOG[:,0], yOG[:,1],yerr = yOG[:,2],fmt='o', color='y')
    
    ##################
    y, yerr = obj.desr_GP[1], obj.desr_GP[2]
    yOG = obj.desr
    
    ax3.set_title('desr')
    ax3.set_ylabel('Flux')
    ax3.set_xlabel('Time in days')
    
    ax3.set_ylim(ymin=max(yOG[:,1])*(-0.5), ymax=max(yOG[:,1])*2)
    
    ax3.fill_between(obj.Xaxis, y-yerr, y+yerr, alpha=.4, color='r')
    ax3.plot(obj.Xaxis, y, color = 'r')
    ax3.errorbar(yOG[:,0], yOG[:,1],yerr = yOG[:,2],fmt='o', color='r')
    
    ###################
    y, yerr = obj.desz_GP[1], obj.desz_GP[2]
    yOG = obj.desz
    
    ax4.set_title('desz')
    ax4.set_ylabel('Flux')
    ax4.set_xlabel('Time in days')
    
    ax4.set_ylim(ymin=max(yOG[:,1])*(-0.5), ymax=max(yOG[:,1])*2)
    
    ax4.fill_between(obj.Xaxis, y-yerr, y+yerr, alpha=.4, color='b')
    ax4.plot(obj.Xaxis, y, color = 'b')
    ax4.errorbar(yOG[:,0], yOG[:,1],yerr = yOG[:,2],fmt='o', color='b')

In [None]:
def wavelets(obj, wavelet = 'sym2', mlev = 2 ):
    wav = wt.Wavelet(wavelet)
    coeff_g = np.array(wt.swt(obj['desg_GP'][1], wav, level=mlev)).flatten()
    coeff_i = np.array(wt.swt(obj['desi_GP'][1], wav, level=mlev)).flatten()
    coeff_r = np.array(wt.swt(obj['desr_GP'][1], wav, level=mlev)).flatten()
    coeff_z = np.array(wt.swt(obj['desz_GP'][1], wav, level=mlev)).flatten()
    coeffs = np.concatenate([coeff_g,coeff_i,coeff_r,coeff_z])
    #coeffs = coeffs.reshape(-1, 1)
    return coeffs

In [None]:
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    #classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax