In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing, model_selection, metrics, ensemble
import pyarrow.parquet as pq
import pickle
import itertools
import matplotlib.pyplot as plt
from collections import Counter
from imblearn.ensemble import BalancedRandomForestClassifier as RandomForestClassifier
from scipy.stats import randint as sp_randint
from sklearn.utils import class_weight
import os

import plotly.graph_objects as go
from astropy.time import Time
from time import time

In [13]:
folder = './datos_exp4/'
features_4x4x10 = 'features_4x4x10.csv'
features_5x4x5 = 'features_5x4x5.csv'
features_2x4x5 = 'features_2x4x5.csv'

path = folder + features_4x4x10

df_features_tensor = pd.read_csv(path)
df_features_tensor = df_features_tensor.drop(['Unnamed: 0'],axis=1)
print(f'total eventos: {len(df_features_tensor)}')
df_features_tensor.head()

total eventos: 6909


Unnamed: 0,oid,Amplitude_1,Amplitude_2,AndersonDarling_1,AndersonDarling_2,Autocor_length_1,Autocor_length_2,Beyond1Std_1,Beyond1Std_2,Con_1,...,sgscore1,W1-W2,W2-W3,r-W3,r-W2,g-W3,g-W2,delta_period_1,delta_period_2,Tensor
0,ZTF17aaaemke,0.319945,0.266913,0.999845,0.999997,1.0,1.0,0.361111,0.375,0.0,...,0.99875,-0.025,1.044,3.225166,2.181166,4.193614,3.149614,1.885743e-06,4.405883e-08,[ 1.65146666e+01 -1.00000000e+03 1.64708068e+...
1,ZTF17aaafyya,0.376849,0.347666,1.0,1.0,1.0,1.0,0.421875,0.388889,0.0,...,0.99625,-0.046,0.244,2.550194,2.306194,3.454954,3.210954,2.254299e-06,3.267149e-08,[1.54776381e+01 1.55176913e+01 1.56471603e+01 ...
2,ZTF17aaageae,0.218249,0.327319,1.0,0.999997,1.0,1.0,0.4,0.115385,0.0,...,1.0,0.023,0.629,1.937573,1.308573,2.133469,1.504469,6.688907e-05,6.721974e-05,[14.6432357 14.40605199 14.5821878 14.627640...
3,ZTF17aaaivsr,0.344626,0.30527,0.999993,0.821926,1.0,1.0,0.384615,0.4,0.0,...,0.992012,-0.06,1.885,4.006276,2.121276,4.565984,2.680984,1.78289e-06,1.78289e-06,[1.68730163e+01 1.66658520e+01 1.69132795e+01 ...
4,ZTF17aaaizej,0.660175,0.404425,1.0,1.0,1.0,1.0,0.317073,0.311111,0.0,...,1.0,0.009,2.871,4.185692,1.314692,4.359793,1.488793,4.163987e-08,6.98218e-05,[16.59538119 16.52297844 16.78657733 16.866158...


In [16]:
df_tensor = pd.DataFrame( df_features_tensor[['oid','Tensor']])
print(type(df_tensor))
print(f'total: {len(df_tensor)}')
df_tensor.head()

<class 'pandas.core.frame.DataFrame'>
total: 6909


Unnamed: 0,oid,Tensor
0,ZTF17aaaemke,[ 1.65146666e+01 -1.00000000e+03 1.64708068e+...
1,ZTF17aaafyya,[1.54776381e+01 1.55176913e+01 1.56471603e+01 ...
2,ZTF17aaageae,[14.6432357 14.40605199 14.5821878 14.627640...
3,ZTF17aaaivsr,[1.68730163e+01 1.66658520e+01 1.69132795e+01 ...
4,ZTF17aaaizej,[16.59538119 16.52297844 16.78657733 16.866158...


 ### Defining functions to plot the confusion matrix and the feature importance

In [None]:
def plot_confusion_matrix(cm, classes, plot_name,
                          normalize=True,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = np.round((cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])*100)
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    
    
    print(cm)

    fig, ax = plt.subplots(figsize=(12, 10))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    #plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45, fontsize = 17)
    plt.yticks(tick_marks, classes, fontsize = 17)

    #fmt = '.2f' if normalize else 'd'
    fmt =  'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, "%d"%  (cm[i, j]),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black",fontsize = 16)

    plt.tight_layout()
    plt.ylabel('True label',fontsize = 18)
    plt.xlabel('Predicted label',fontsize = 18)
    plt.savefig(plot_name, bbox_inches='tight')
    #plt.close()
    


def plot_feature_importances(model, feature_names,feature_importances_name):
    I = np.argsort(model.feature_importances_)[::-1]
    I = I[0:60]
    for i in I[0:30]:
        print(feature_names[i], "& %.3f" % (model.feature_importances_[i]))
    fig, ax = plt.subplots(figsize=(16, 5), tight_layout=True)
    x_plot = np.arange(len(I))
    plt.xticks(x_plot, [feature_names[i] for i in I], rotation='vertical')
    ax.bar(x_plot, height=model.feature_importances_[I]);
    plt.savefig(feature_importances_name, bbox_inches='tight')
    #plt.close()