In [1]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import functions
import models
import embedder
import training_functions
from torch.utils import data
import glob
import dataset
from preprocessing import linear_interpolation_collate_fn
import time
import samplers
import frontier
import pandas as pd
from scipy.special import softmax

# Set the device parameters
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda:0")
print('Device in use : '+str(device))

# Create the parameters dict, will be fill after

parameters = dict()
parameters['device'] = device
parameters['tmps_form_last_step'] = time.time()

# Should set all parameters of dataloader in this dictionary

dataloader_params = dict( # A REVOIR POUR LES DONNEES TWEETS
    dataset=None,  # Will change to take dataset
    batch_size=2,
    shuffle=False,
    batch_sampler=samplers.OppositeSameSizeTwoSentenceBatchSampler,
    sampler=None,
    num_workers=0,
    collate_fn=linear_interpolation_collate_fn,
    pin_memory=False,
    drop_last=False,
    timeout=0,
    worker_init_fn=None,
    divide_by=[1, 2, 5, 20],
    divide_at=[0, 20, 30, 50]
)

# Should set all parameters of criterion in this dictionary

embedder_params = dict(
    path='./data/model_embedding/fine_tune_W2V.model',
    padding_idx=None,
    max_norm=None,
    norm_type=2.0,
    scale_grad_by_freq=False,
    sparse=False,
    _weight=None
)

parameters['embedder'] = embedder.W2VCustomEmbedding(**embedder_params).to(parameters['device'])

dataloader_params['dataset'] = dataset.YelpTweetDataset(
    # path='/home/alexis/Project/Data/NLP_Dataset/all_setences_en_processed.tsv',
    path='../Data/Yelp/',
    file_name='20review_binary',
    file_type='csv',
    device=parameters['device'],
    return_id=True,
    text_column='text',
    label_column='target')

# Set True or False for padable

dataloader_params['dataset'].set_embedder(parameters)

parameters['pad_token'] = parameters['embedder'].word2index['<pad>']

# Should set all parameters of model in this dictionary

'''model_params = dict(
    ntoken=len(parameters['embedder'].word2index),  # len(TEXT.vocab.stoi), # the size of vocabulary
    ninp=parameters['embedder'].embedding_dim,  # embedding dimension
    nhid=512,  # the dimension of the feedforward network model in nn.TransformerEncoder
    nlayers=6,  # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder 10-16
    nhead=10,  # the number of heads in the multi_head_attention models
    dropout=0.1,
    device=parameters['device']
)'''

print('Longer sentence in data : '+str(max(dataloader_params['dataset'].size)))

encoder_params = dict(
    embedder=parameters['embedder'],
    dropout_p=0.1,
    device=parameters['device'],
    teacher_forcing_ratio=0,  # Non entrainement
    num_layers=2,
    bidirectional=False,
    encode_size=512,
    max_length=max(dataloader_params['dataset'].size)
)

# classifier_params = dict(
#     embedder=parameters['embedder'],
#     dropout=0.5,
#     layer_dropout=0.3,
#     device=parameters['device'], # a voir si je le laisse
#     n_layers=2,
#     bidirectional=False,
#     n_hidden=512,
#     n_out=2 #formule pour récupérer le nombre de classe du dataset
# )

model_params = dict(
    num_class=dataloader_params['dataset'].num_class
)

parameters['encoder_model'] = models.AttnAutoEncoderRNN(**encoder_params).to(parameters['device'])  #models.TransformerModel(**model_params).to(parameters['device'])
# parameters['encoder_model'].load_state_dict(torch.load(str("./executions/FromGPU4_MediumFixed/models/Best_Model_Epoch_20.pt"), map_location=device))
# parameters['classifier_model'] = models.SentimentRNN(**classifier_params).to(parameters['device'])  #models.TransformerModel(**model_params).to(parameters['device'])
# parameters['model'] = models.EncoderClassifier(parameters['encoder_model'], parameters['classifier_model'], parameters['embedder'])
parameters['model'] = models.EncoderClassifierDecoder(parameters['encoder_model'], parameters['embedder'], model_params['num_class'], device)

name_execution = 'FromGPU4_EncoderUnique' # A CHANGER

#with open("./executions/" + name_execution + "/model.pkl", 'rb') as f:
    #model = pkl.load(f)
parameters['model'] = parameters['model'].to(parameters['device'])  #models.TransformerModel(**model_params).to(parameters['device'])
parameters['encoder_model'] = parameters['model'].encoder
parameters['classifier_model'] = parameters['model'].classifier
# for name, param in model.named_parameters():
#     if param.requires_grad:
#         print(name, param.data)

#with open("./executions/" + name_execution + "/embedder.pkl", 'rb') as f:
    #embedder = pkl.load(f)
for f in glob.glob("./executions/" + str(name_execution) + "/models/Model_Epoch_5.pt"):
    print('model import : '+str(f))
    parameters['model'].load_state_dict(torch.load(str(f), map_location=device))
# model = torch.load(str("executions/FromGPU4_Short/models/Best_Model_Epoch_18.pt"))
parameters['model'].eval()

Device in use : cuda:0
sizes
104364
104364
Longer sentence in data : 20
cuda:0
model import : ./executions/FromGPU4_EncoderUnique/models/Model_Epoch_5.pt


EncoderClassifierDecoder(
  (encoder): AttnAutoEncoderRNN(
    (embedder): W2VCustomEmbedding(192192, 300)
    (encoder): EncoderRNN(
      (embedding): W2VCustomEmbedding(192192, 300)
      (gru): GRU(300, 512, num_layers=2)
    )
    (decoder): AttnDecoderRNN(
      (embedding): W2VCustomEmbedding(192192, 300)
      (attn): Linear(in_features=812, out_features=21, bias=True)
      (attn_combine): Linear(in_features=812, out_features=300, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
      (gru): GRU(300, 512, num_layers=2)
      (out): Linear(in_features=512, out_features=192193, bias=True)
    )
  )
  (embedder): W2VCustomEmbedding(192192, 300)
  (classifier): Linear(in_features=1024, out_features=2, bias=True)
  (sig_out): Softmax(dim=1)
)

In [2]:
for name, param in parameters['model'].named_parameters():
    if param.requires_grad and "classifier" in name:
        if "weight" in name:
            weight = param.data.cpu().data.numpy()
        else :
            biais = param.data.cpu().data.numpy()
        print(name, param.data.shape)

classifier.weight torch.Size([2, 1024])
classifier.bias torch.Size([2])


In [3]:
import pandas as pd
import numpy as np

In [4]:
weight

array([[-0.09971751, -0.07739043, -0.3053918 , ...,  0.01417913,
         0.17426673, -0.05969966],
       [ 0.02784912,  0.01768062,  0.13319671, ..., -0.0218176 ,
        -0.03291409,  0.1052188 ]], dtype=float32)

In [5]:
biais

array([ 0.00771963, -0.02887552], dtype=float32)

In [6]:
plans = pd.DataFrame(data=None, index = None, columns=list(range(1024))+['biais']) 

In [7]:
plans.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1015,1016,1017,1018,1019,1020,1021,1022,1023,biais


In [8]:
plans = plans.append(pd.DataFrame(data=[np.append(weight[0], biais[0])], columns=plans.columns))
plans = plans.append(pd.DataFrame(data=[np.append(weight[1], biais[1])], columns=plans.columns))

In [9]:
plans.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1015,1016,1017,1018,1019,1020,1021,1022,1023,biais
0,-0.099718,-0.07739,-0.305392,0.256532,-0.033397,0.220542,0.105602,-0.051412,-0.292714,0.01257,...,0.048517,-0.018064,-0.039292,0.071533,-0.090711,-0.501262,0.014179,0.174267,-0.0597,0.00772
0,0.027849,0.017681,0.133197,-0.098239,0.030657,-0.089705,0.001464,0.136552,0.187572,-0.009032,...,-0.00874,0.002646,0.013864,-0.100081,0.069355,0.321744,-0.021818,-0.032914,0.105219,-0.028876


In [10]:
import pickle as pkl

In [11]:
base_data = pd.read_csv('./executions/FRONTIER/nn_frontier_V3_provisoire_10000.csv')
del base_data['Unnamed: 0']

In [12]:
base_data.head()

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,...,1022,1023,pred_class_0,pred_class_1,class,good_predict,frontier,nearest,value_0,value_1
0,31239_87918,-0.643436,-0.850516,-0.430191,-0.573974,0.999807,0.394134,0.831877,0.506907,-0.216235,...,-0.587946,-0.574902,0.495721,0.5042793,frontier,,True,,-0.130172,-0.113055
1,31239,0.253644,-0.321084,-0.589449,-0.970542,0.998823,0.319632,0.981234,-0.799514,-0.240576,...,-0.52576,-0.922458,1.0,1.740372e-25,0,True,False,87918.0,35.009411,-22.001118
2,49277_50417,-0.712807,-0.918899,-0.552527,-0.561892,0.999309,0.409713,-0.743229,0.854451,-0.015443,...,-0.560251,-0.478924,0.50625,0.4937504,frontier,,True,,0.066668,0.041668
3,49277,-0.569401,-0.987312,-0.599012,-0.194117,0.999915,0.700822,-0.892934,0.797688,0.506043,...,0.289179,-0.564293,1.0,8.848381000000001e-27,0,True,False,50417.0,35.874855,-24.114708
4,1942_19126,-0.730936,-0.921459,-0.147092,-0.221093,0.991928,0.619277,0.963386,0.098669,-0.202306,...,0.881488,-0.656407,0.494795,0.5052046,frontier,,True,,-0.617754,-0.596934


In [13]:
base_data = base_data[base_data['frontier'] == False]

In [14]:
base_data.head()

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,...,1022,1023,pred_class_0,pred_class_1,class,good_predict,frontier,nearest,value_0,value_1
1,31239,0.253644,-0.321084,-0.589449,-0.970542,0.998823,0.319632,0.981234,-0.799514,-0.240576,...,-0.52576,-0.922458,1.0,1.740372e-25,0,True,False,87918.0,35.009411,-22.001118
3,49277,-0.569401,-0.987312,-0.599012,-0.194117,0.999915,0.700822,-0.892934,0.797688,0.506043,...,0.289179,-0.564293,1.0,8.848381000000001e-27,0,True,False,50417.0,35.874855,-24.114708
5,1942,-0.09962,-0.986444,-0.361907,0.04022,0.960831,0.166444,0.99353,0.699363,-0.028185,...,0.806135,-0.645377,1.0,2.634413e-26,0,True,False,19126.0,35.680145,-23.218409
7,20118,0.808946,-0.716721,-0.590019,-0.362874,0.999995,0.036835,0.978763,0.854471,-0.250788,...,-0.989469,-0.224594,1.077525e-11,1.0,1,True,False,104041.0,-15.793471,9.460297
9,60344,-0.784158,-0.942368,-0.52778,-0.306043,-0.835234,0.623065,0.975667,-0.918574,0.009565,...,0.206828,-0.823972,1.0,4.594579e-27,0,True,False,58133.0,36.932259,-23.712664


In [15]:
len(base_data)

10000

In [16]:
params_1 = plans.iloc[0,:1024]
bias_1 = plans.iloc[0, 1024:1025][0]
params_2 = plans.iloc[1, :1024]
bias_2 = plans.iloc[1, 1024:1025][0]

In [17]:
bias_1

0.007719630375504494

In [18]:
def frontierd(idx):
    return idx+"_prj"

In [19]:
point = np.transpose(base_data.iloc[:,1:1025])
point.index = params_1.index
ids = list(map(frontierd, base_data['id']))

In [20]:
point

Unnamed: 0,1,3,5,7,9,11,13,15,17,19,...,19829,19831,19833,19835,19837,19839,19841,19843,19845,19847
0,0.253644,-0.569401,-0.099620,0.808946,-0.784158,-0.812745,-0.762446,0.930629,-0.449690,0.592340,...,-0.369841,0.921774,-0.958439,0.478157,-0.637405,0.645882,0.516510,0.453954,0.610930,-0.724334
1,-0.321084,-0.987312,-0.986444,-0.716721,-0.942368,-0.955966,0.999716,-0.938721,-0.978236,-0.909602,...,-0.762898,-0.971933,-0.956549,0.362413,-0.855458,-0.751155,-0.881884,-0.968562,0.972164,-0.906010
2,-0.589449,-0.599012,-0.361907,-0.590019,-0.527780,-0.743353,-0.866843,-0.961780,-0.773313,-0.946830,...,-0.705224,-0.453048,-0.925221,-0.904542,-0.909877,-0.913937,-0.026949,-0.563381,-0.961146,-0.699378
3,-0.970542,-0.194117,0.040220,-0.362874,-0.306043,-0.116108,-0.481742,-0.305426,-0.704228,0.204036,...,-0.280179,-0.153294,-0.388090,-0.443570,-0.397430,-0.499451,-0.201459,-0.067864,-0.474757,-0.073857
4,0.998823,0.999915,0.960831,0.999995,-0.835234,0.985474,0.999984,0.999863,0.999986,0.997934,...,0.999549,0.999970,0.999979,0.997960,0.996750,0.986237,0.322725,0.998915,0.996357,-0.660983
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1019,0.331789,-0.320049,0.658584,0.918464,-0.568760,0.350692,0.087920,0.221356,0.962919,0.518410,...,0.676246,0.018801,0.204985,-0.360759,-0.225937,-0.003226,-0.307744,0.097742,-0.141381,0.414023
1020,-0.980479,-0.999913,-0.997743,0.038868,-0.832971,-0.998910,-0.824413,-0.816798,0.295481,0.974505,...,-0.826246,-0.998247,-0.825848,-0.976177,-0.999903,-0.993910,0.158497,-0.279693,-0.999995,-0.968882
1021,-0.429701,-0.986968,-0.902522,-0.910976,-0.108069,0.973692,-0.285662,-0.993370,0.988545,0.955299,...,-0.992874,-0.991746,0.587978,0.763571,-0.285082,-0.665404,0.769506,-0.921997,-0.715230,0.991823
1022,-0.525760,0.289179,0.806135,-0.989469,0.206828,0.011771,-0.503033,-0.999868,0.746731,0.322754,...,0.335442,-0.008170,0.842988,-0.999782,0.364845,-0.733520,-0.589332,-0.352998,0.410676,-0.956356


In [21]:
params_frontier = params_1 - params_2
bias_frontier = bias_1 - bias_2
norm_param = np.linalg.norm(params_frontier)

In [22]:
#np.repeat([params_frontier.to_numpy()], 2, axis=0)/norm_param

In [23]:
# new_point = point - \
# ((np.dot(params_frontier/norm_param, point) + \
#   bias_frontier/norm_param)) * params_frontier/norm_param

new_point = point.to_numpy() - \
(np.dot(params_frontier.to_numpy()/norm_param, point.to_numpy()) + \
([bias_frontier]*(len(point.columns))/norm_param)) * np.transpose(np.repeat([params_frontier.to_numpy()], len(point.columns), axis=0))/norm_param

In [24]:
new_point.shape

(1024, 10000)

In [25]:
np.dot(params_frontier.to_numpy(), new_point/norm_param) + bias_frontier/norm_param

array([-1.41293227e-15, -3.30464822e-16,  1.95243127e-15, ...,
       -9.54097912e-18, -4.04971195e-15, -2.60381994e-15])

In [26]:
np.dot(params_frontier.to_numpy(), new_point/norm_param) + bias_frontier/norm_param

array([-1.41293227e-15, -3.30464822e-16,  1.95243127e-15, ...,
       -9.54097912e-18, -4.04971195e-15, -2.60381994e-15])

In [27]:
prjs = pd.DataFrame(np.transpose(new_point), index=ids)

In [28]:
prjs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
31239_prj,0.316054,-0.274573,-0.374878,-1.144108,1.03016,0.167849,0.930286,-0.707556,-0.005604,0.989432,...,1.124714,-0.295803,0.919728,0.831049,-0.462189,0.410098,-0.577838,-0.447312,-0.62712,-0.841774
49277_prj,-0.50373,-0.93837,-0.373228,-0.376752,1.032889,0.541108,-0.946544,0.894452,0.753293,0.98888,...,0.151001,-0.071348,-0.880766,0.676531,-0.32723,-0.237648,-0.576231,-1.005499,0.182523,-0.479393
1942_prj,-0.035143,-0.938392,-0.140229,-0.139093,0.993206,0.009635,0.940896,0.794367,0.214568,0.989082,...,0.859473,-0.079872,-0.940694,0.548671,0.449809,0.739487,-0.581767,-0.920716,0.701419,-0.562022
20118_prj,0.781301,-0.737324,-0.685067,-0.28599,0.986113,0.10407,1.001331,0.813737,-0.354873,1.004681,...,0.170145,0.813801,0.982247,0.755497,-0.71838,0.883776,-0.139488,-0.903175,-0.94457,-0.260334
60344_prj,-0.71777,-0.892891,-0.299529,-0.490673,-0.801899,0.461606,0.921472,-0.820754,0.259516,0.988758,...,-0.068496,-0.047562,0.957812,1.013444,-0.021251,-0.485458,-0.404662,-0.126802,0.099007,-0.738145


In [29]:
columns_to_add = ['pred_class_0', 'pred_class_1', 'class', 'good_predict', 'frontier', 'nearest', 'value_0', 'value_1']
prjs['value_0'] = np.dot(params_1.to_numpy(), new_point) + bias_1 # A REFLECHIR
prjs['value_1'] = np.dot(params_2.to_numpy(), new_point) + bias_2 # A REFLECHIR
prjs['pred_class_0'] = softmax(np.stack([prjs['value_0'], prjs['value_1']]), 0)[0] # A REFLECHIR
prjs['pred_class_1'] = softmax(np.stack([prjs['value_0'], prjs['value_1']]), 0)[1] # A REFLECHIR
prjs['class'] = list(base_data['class'])
prjs['good_predict'] = list(base_data['good_predict'])
prjs['frontier'] = [True]*len(base_data)
prjs['nearest'] = [None]*len(base_data)

In [30]:
prjs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1022,1023,value_0,value_1,pred_class_0,pred_class_1,class,good_predict,frontier,nearest
31239_prj,0.316054,-0.274573,-0.374878,-1.144108,1.03016,0.167849,0.930286,-0.707556,-0.005604,0.989432,...,-0.62712,-0.841774,-1.2117,-1.2117,0.5,0.5,0,True,True,
49277_prj,-0.50373,-0.93837,-0.373228,-0.376752,1.032889,0.541108,-0.946544,0.894452,0.753293,0.98888,...,0.182523,-0.479393,-2.238958,-2.238958,0.5,0.5,0,True,True,
1942_prj,-0.035143,-0.938392,-0.140229,-0.139093,0.993206,0.009635,0.940896,0.794367,0.214568,0.989082,...,0.701419,-0.562022,-1.740506,-1.740506,0.5,0.5,0,True,True,
20118_prj,0.781301,-0.737324,-0.685067,-0.28599,0.986113,0.10407,1.001331,0.813737,-0.354873,1.004681,...,-0.94457,-0.260334,0.251276,0.251276,0.5,0.5,1,True,True,
60344_prj,-0.71777,-0.892891,-0.299529,-0.490673,-0.801899,0.461606,0.921472,-0.820754,0.259516,0.988758,...,0.099007,-0.738145,-1.597932,-1.597932,0.5,0.5,0,True,True,


In [31]:
base_data.head()

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,...,1022,1023,pred_class_0,pred_class_1,class,good_predict,frontier,nearest,value_0,value_1
1,31239,0.253644,-0.321084,-0.589449,-0.970542,0.998823,0.319632,0.981234,-0.799514,-0.240576,...,-0.52576,-0.922458,1.0,1.740372e-25,0,True,False,87918.0,35.009411,-22.001118
3,49277,-0.569401,-0.987312,-0.599012,-0.194117,0.999915,0.700822,-0.892934,0.797688,0.506043,...,0.289179,-0.564293,1.0,8.848381000000001e-27,0,True,False,50417.0,35.874855,-24.114708
5,1942,-0.09962,-0.986444,-0.361907,0.04022,0.960831,0.166444,0.99353,0.699363,-0.028185,...,0.806135,-0.645377,1.0,2.634413e-26,0,True,False,19126.0,35.680145,-23.218409
7,20118,0.808946,-0.716721,-0.590019,-0.362874,0.999995,0.036835,0.978763,0.854471,-0.250788,...,-0.989469,-0.224594,1.077525e-11,1.0,1,True,False,104041.0,-15.793471,9.460297
9,60344,-0.784158,-0.942368,-0.52778,-0.306043,-0.835234,0.623065,0.975667,-0.918574,0.009565,...,0.206828,-0.823972,1.0,4.594579e-27,0,True,False,58133.0,36.932259,-23.712664


In [32]:
base_data.index = base_data['id']
base_data = base_data.drop('id', axis=1)

In [33]:
prjs.columns = prjs.columns.astype(str)

In [34]:
all_data = prjs[base_data.columns]

In [35]:
prjs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1022,1023,value_0,value_1,pred_class_0,pred_class_1,class,good_predict,frontier,nearest
31239_prj,0.316054,-0.274573,-0.374878,-1.144108,1.03016,0.167849,0.930286,-0.707556,-0.005604,0.989432,...,-0.62712,-0.841774,-1.2117,-1.2117,0.5,0.5,0,True,True,
49277_prj,-0.50373,-0.93837,-0.373228,-0.376752,1.032889,0.541108,-0.946544,0.894452,0.753293,0.98888,...,0.182523,-0.479393,-2.238958,-2.238958,0.5,0.5,0,True,True,
1942_prj,-0.035143,-0.938392,-0.140229,-0.139093,0.993206,0.009635,0.940896,0.794367,0.214568,0.989082,...,0.701419,-0.562022,-1.740506,-1.740506,0.5,0.5,0,True,True,
20118_prj,0.781301,-0.737324,-0.685067,-0.28599,0.986113,0.10407,1.001331,0.813737,-0.354873,1.004681,...,-0.94457,-0.260334,0.251276,0.251276,0.5,0.5,1,True,True,
60344_prj,-0.71777,-0.892891,-0.299529,-0.490673,-0.801899,0.461606,0.921472,-0.820754,0.259516,0.988758,...,0.099007,-0.738145,-1.597932,-1.597932,0.5,0.5,0,True,True,


In [36]:
all_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1022,1023,pred_class_0,pred_class_1,class,good_predict,frontier,nearest,value_0,value_1
31239_prj,0.316054,-0.274573,-0.374878,-1.144108,1.03016,0.167849,0.930286,-0.707556,-0.005604,0.989432,...,-0.62712,-0.841774,0.5,0.5,0,True,True,,-1.2117,-1.2117
49277_prj,-0.50373,-0.93837,-0.373228,-0.376752,1.032889,0.541108,-0.946544,0.894452,0.753293,0.98888,...,0.182523,-0.479393,0.5,0.5,0,True,True,,-2.238958,-2.238958
1942_prj,-0.035143,-0.938392,-0.140229,-0.139093,0.993206,0.009635,0.940896,0.794367,0.214568,0.989082,...,0.701419,-0.562022,0.5,0.5,0,True,True,,-1.740506,-1.740506
20118_prj,0.781301,-0.737324,-0.685067,-0.28599,0.986113,0.10407,1.001331,0.813737,-0.354873,1.004681,...,-0.94457,-0.260334,0.5,0.5,1,True,True,,0.251276,0.251276
60344_prj,-0.71777,-0.892891,-0.299529,-0.490673,-0.801899,0.461606,0.921472,-0.820754,0.259516,0.988758,...,0.099007,-0.738145,0.5,0.5,0,True,True,,-1.597932,-1.597932


In [37]:
base_data.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1022,1023,pred_class_0,pred_class_1,class,good_predict,frontier,nearest,value_0,value_1
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
31239,0.253644,-0.321084,-0.589449,-0.970542,0.998823,0.319632,0.981234,-0.799514,-0.240576,1.0,...,-0.52576,-0.922458,1.0,1.740372e-25,0,True,False,87918.0,35.009411,-22.001118
49277,-0.569401,-0.987312,-0.599012,-0.194117,0.999915,0.700822,-0.892934,0.797688,0.506043,1.0,...,0.289179,-0.564293,1.0,8.848381000000001e-27,0,True,False,50417.0,35.874855,-24.114708
1942,-0.09962,-0.986444,-0.361907,0.04022,0.960831,0.166444,0.99353,0.699363,-0.028185,1.0,...,0.806135,-0.645377,1.0,2.634413e-26,0,True,False,19126.0,35.680145,-23.218409
20118,0.808946,-0.716721,-0.590019,-0.362874,0.999995,0.036835,0.978763,0.854471,-0.250788,1.0,...,-0.989469,-0.224594,1.077525e-11,1.0,1,True,False,104041.0,-15.793471,9.460297
60344,-0.784158,-0.942368,-0.52778,-0.306043,-0.835234,0.623065,0.975667,-0.918574,0.009565,1.0,...,0.206828,-0.823972,1.0,4.594579e-27,0,True,False,58133.0,36.932259,-23.712664


In [38]:
type(base_data['0'][0])

numpy.float64

In [39]:
new_data = pd.concat([all_data, base_data])

In [40]:
new_data[9995:10005]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1022,1023,pred_class_0,pred_class_1,class,good_predict,frontier,nearest,value_0,value_1
81702_prj,0.707255,-0.705416,-0.702931,-0.670133,1.017053,0.287787,-1.04966,-0.355229,0.030633,0.989607,...,-0.833196,-0.388934,0.5,0.5,0,True,True,,-1.839661,-1.839661
90449_prj,0.554459,-0.853602,0.103522,-0.306996,0.341779,0.046343,0.956542,-0.589221,-0.213174,0.938833,...,-0.650964,-0.67806,0.5,0.5,0,True,True,,-1.693151,-1.693151
31142_prj,0.501376,-0.93322,-0.40034,-0.199747,1.022727,-0.758804,0.282275,0.26433,-0.101068,0.99197,...,-0.430015,-0.099084,0.5,0.5,0,True,True,,-1.451275,-1.451275
50588_prj,0.676574,1.021087,-0.735453,-0.657319,1.029319,0.129979,0.79453,-0.284003,-0.425282,0.988884,...,0.304063,-0.679548,0.5,0.5,0,True,True,,-1.819325,-1.819325
67912_prj,-0.660154,-0.858179,-0.478719,-0.252346,-0.628756,-0.139738,0.935174,0.765037,0.441098,0.989132,...,-1.060591,-0.294455,0.5,0.5,0,True,True,,-1.094007,-1.094007
31239,0.253644,-0.321084,-0.589449,-0.970542,0.998823,0.319632,0.981234,-0.799514,-0.240576,1.0,...,-0.52576,-0.922458,1.0,1.740372e-25,0,True,False,87918.0,35.009411,-22.001118
49277,-0.569401,-0.987312,-0.599012,-0.194117,0.999915,0.700822,-0.892934,0.797688,0.506043,1.0,...,0.289179,-0.564293,1.0,8.848381000000001e-27,0,True,False,50417.0,35.874855,-24.114708
1942,-0.09962,-0.986444,-0.361907,0.04022,0.960831,0.166444,0.99353,0.699363,-0.028185,1.0,...,0.806135,-0.645377,1.0,2.634413e-26,0,True,False,19126.0,35.680145,-23.218409
20118,0.808946,-0.716721,-0.590019,-0.362874,0.999995,0.036835,0.978763,0.854471,-0.250788,1.0,...,-0.989469,-0.224594,1.077525e-11,1.0,1,True,False,104041.0,-15.793471,9.460297
60344,-0.784158,-0.942368,-0.52778,-0.306043,-0.835234,0.623065,0.975667,-0.918574,0.009565,1.0,...,0.206828,-0.823972,1.0,4.594579e-27,0,True,False,58133.0,36.932259,-23.712664


In [41]:
new_data.to_csv('./executions/FRONTIER/points_projs_10000_V6.csv')