In [88]:
import KNNclassy
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import importlib
from sklearn.neighbors import NearestNeighbors
importlib.reload(KNNclassy)
import pandas as pd

# Creation of parameter sweep (if needed) #

In [89]:
def param_sweep(pathClassy):
    
    df = pd.DataFrame()
    mass1 = np.arange(1, 70.5, 0.5)
    mass2 = np.arange(1, 70.5, 0.5)
    t = Table(
        data=np.vstack(
            (np.repeat(mass1, mass2.size),
                np.tile(mass2, mass1.size))
        ).T, names=('mass1', 'mass2')
    )
    mask = t['mass1'] > t['mass2']
    t = t[mask]

    spins = Table(
        data=np.vstack(
            (np.repeat(np.linspace(0, 1, 2), 2),
                np.tile(np.linspace(0, 1, 2), 2))
        ).T,
        names=('chi1', 'chi2')
    )
    SNR = 10.
    res = list()
    for spin_vals in spins:
        SNR *= np.ones(len(t))
        chi1 = spin_vals['chi1'] * np.ones(len(t))
        chi2 = spin_vals['chi2'] * np.ones(len(t))
        # make predictions and make plots
        param_sweep_features = np.stack(
            [t['mass1'], t['mass2'], chi1, chi2, SNR]
        ).T
    # both KNN and mass_gap are binary classifier with p(false) p(true)
    # P(true) can be accessed with predict_proba(data).T[1]
        res.append(param_sweep_features)
    param_sweep = np.reshape(param_sweep,(550*9591,5))
    pd.DataFrame(param_sweep).to_csv(pathClassy+"sweep_v2.csv")
    
    return 

# Functions for O2 testing dataset and O3/param_sweep events

In [99]:
def matrix_KNN(algo,nneigh,eos):
    
    total_events = len(algo.label_test)
    nplus_NO = np.zeros(total_events)
    nplus_NS = np.zeros(total_events)
    nplus_REM = np.zeros(total_events)
    
    for i in range(0,total_events):
        distance, closestn = algo.model.kneighbors(algo.xtest[i].reshape(1, -1),n_neighbors=nneigh,return_distance = True)
        prediction = algo.model.predict(algo.xtest[i].reshape(1, -1))
        if (distance.any() <= 0.001):
            print('event '+str(i)+' gives problems')
            print(distance[0])
        weighted_neighs = 1/distance[0]
        norm = np.sum(1/distance[0])
        labels = algo.label_train[closestn][0].tolist()
        for ll in range(0,len(labels)): 
            labels[ll] = int(labels[ll])
        labels = np.array(labels)
        ind_0 = np.where(labels == 0)
        ind_1 = np.where(labels == 1)
        ind_2 = np.where(labels == 2)
    
        neighs_0 = weighted_neighs[ind_0]
        neighs_1 = weighted_neighs[ind_1]
        neighs_2 = weighted_neighs[ind_2]

        nplus_NO[i] = np.sum(neighs_0)/norm
        nplus_NS[i] = np.sum(neighs_1)/norm
        nplus_REM[i] = np.sum(neighs_2)/norm
        
    data = pd.DataFrame({'HasNS_true':algo.label_NStrue})
    data['HasREM_true'] = algo.label_REMtrue
    data['f0'] = nplus_NO
    data['f1'] = nplus_NS
    data['f2'] = nplus_REM
    data.to_csv('matrix_KNN_O2_EOS-'+eos+'.csv', index = False)
        
    return 

def matrix_KNN_O3(KNNdic, nneigh,eos,events, events_id = None, o3 = False):
    
    total_events = len(events[:,0])

    nplus_no = np.zeros(total_events)
    nplus_NS = np.zeros(total_events)
    nplus_REM = np.zeros(total_events)
    
    for i in range(0,total_events):
        distance, closestn = KNNdic['knn'].model.kneighbors(events[i].reshape(1, -1),n_neighbors=nneigh,return_distance = True)
        if (distance.any() <= 0.001):
            print('event '+str(i)+' gives problems')
            print(distance[0])
        weighted_neighs = 1/distance[0]
        norm = np.sum(1/distance[0])
        labels = KNNdic['knn'].label_train[closestn][0].tolist()
        for ll in range(0,len(labels)): 
            labels[ll] = int(labels[ll])
        labels = np.array(labels)
        ind_0 = np.where(labels == 0)
        ind_1 = np.where(labels == 1)
        ind_2 = np.where(labels == 2)
    
        neighs_0 = weighted_neighs[ind_0]
        neighs_1 = weighted_neighs[ind_1]
        neighs_2 = weighted_neighs[ind_2]

        nplus_no[i] = np.sum(neighs_0)/norm
        nplus_NS[i] = np.sum(neighs_1)/norm
        nplus_REM[i] = np.sum(neighs_2)/norm
        
    return nplus_NS, nplus_REM, nplus_no



# Functions for checking if the neighbors are counted properly 

In [93]:
def matrix_KNN_test(algo,nneigh,eos,nrandom):
    
    total_events = len(algo.label_test)
    nplus_NO = np.zeros(total_events)
    nplus_NS = np.zeros(total_events)
    nplus_REM = np.zeros(total_events)
    
    rnd_ev = random.sample(range(0,total_events), nrandom)
    print('random events: ',rnd_ev)
    
    total_events = len(rnd_ev)
    
    testing = algo.xtest[rnd_ev]
    labelstest = algo.label_test[rnd_ev]
    
    for i in range(0,total_events):
        distance, closestn = algo.model.kneighbors(testing[i].reshape(1, -1),n_neighbors=nneigh,return_distance = True)
        prediction = algo.model.predict(testing[i].reshape(1, -1))
     
        if (distance.any() <= 0.001):
            print('event '+str(i)+' gives problems')
            print(distance[0])
        weighted_neighs = 1/distance[0]
        norm = np.sum(1/distance[0])
        labels = algo.label_train[closestn][0].tolist()
        for ll in range(0,len(labels)): 
            labels[ll] = int(labels[ll])
        labels = np.array(labels)
        ind_0 = np.where(labels == 0)
        ind_1 = np.where(labels == 1)
        ind_2 = np.where(labels == 2)
    
        neighs_0 = weighted_neighs[ind_0]
        neighs_1 = weighted_neighs[ind_1]
        neighs_2 = weighted_neighs[ind_2]

        nplus_NO[i] = np.sum(neighs_0)/norm
        nplus_NS[i] = np.sum(neighs_1)/norm
        nplus_REM[i] = np.sum(neighs_2)/norm
        
        print('Nothing: ',nplus_NO[i])
        print('NS: ',nplus_NS[i])
        print('REM: ',nplus_REM[i])
        print('Labels neigh: ',labels)
        print('True label: ',labelstest[i])
        print('Prediction: ',prediction)
        print('*'*60)
        
    return 

def matrix_KNN_O3_test(KNNdic, nneigh,eos,eventsnrandom, events_id = None, o3 = False):
    
    total_events = len(events[:,0])
    
    rnd_ev = random.sample(range(0,total_events), nrandom)
    print('random events: ',rnd_ev)
    
    total_events = len(rnd_ev)
    
    events = events[rnd_ev]

    nplus_no = np.zeros(total_events)
    nplus_NS = np.zeros(total_events)
    nplus_REM = np.zeros(total_events)
    
    for i in range(0,total_events):
        distance, closestn = KNNdic['knn'].model.kneighbors(events[i].reshape(1, -1),n_neighbors=nneigh,return_distance = True)

        if (distance.any() <= 0.001):
            print('event '+str(i)+' gives problems')
            print(distance[0])
        weighted_neighs = 1/distance[0]
        norm = np.sum(1/distance[0])
        labels = KNNdic['knn'].label_train[closestn][0].tolist()
        for ll in range(0,len(labels)): 
            labels[ll] = int(labels[ll])
        labels = np.array(labels)
        ind_0 = np.where(labels == 0)
        ind_1 = np.where(labels == 1)
        ind_2 = np.where(labels == 2)
    
        neighs_0 = weighted_neighs[ind_0]
        neighs_1 = weighted_neighs[ind_1]
        neighs_2 = weighted_neighs[ind_2]

        nplus_no[i] = np.sum(neighs_0)/norm
        nplus_NS[i] = np.sum(neighs_1)/norm
        nplus_REM[i] = np.sum(neighs_2)/norm
        
        if o3 == True:
            print('Event ID: ', events_id[i])
        print('Nothing: ',nplus_no[i])
        print('NS: ',nplus_NS[i])
        print('REM: ',nplus_REM[i])
        print('Labels neigh: ',labels)
        print('*'*60)
        
        
    return nplus_NS, nplus_REM, nplus_no

### Definition of paths 

In [95]:
nneigh = [8]
pathClassy = "/Users/miquelmiravet/Projects/IPAM_LA/ML_group/KNN_miq/"
pathData = "/Users/miquelmiravet/Projects/IPAM_LA/ML_group/KNN_miq/input/"
EOS = ["APR4_EPP", "BHF_BBB2", "H4", "HQC18", "KDE0V", "KDE0V1", "MPA1", "MS1_PP", "MS1B_PP", "RS", "SK255", "SK272", "SKI2", "SKI3", "SKI4", "SKI5", "SKI6", "SKMP", "SKOP", "SLy", "SLY2", "SLY9", "SLY230A"]
BayesFactor = [1.526, 1.555, 0.056, 1.422, 1.177, 1.283, 0.276, 0.001, 0.009, 0.176, 0.179, 0.159, 0.108, 0.107, 0.33, 0.025, 0.288, 0.29, 0.618, 1.0, 1.028, 0.37, 0.932]

In [96]:
EOSdic = {}
count = 0

## Loading KNN trained algorithms

In [97]:
for eos in EOS:
    
    print("Doing", eos)
    print('*'*60)

    for kk in nneigh:
        
        print("Nº of neighbors: ", kk)
        print('-'*40)

        KNN = KNNclassy.ClassificationKNN()
        KNN.load_train_dataset(pathData+eos+"/NS/train_NS_karoo_"+eos+"_s300_f0d7.csv",pathData+eos+"/EMB/train_EMB_karoo_"+eos+"_s300_f0d7.csv")
        KNN.load_test_dataset(pathData+eos+"/NS/test_NS_ID_LABEL_"+eos+"_s300_f0d7.csv",pathData+eos+"/EMB/test_EMB_ID_LABEL_"+eos+"_s300_f0d7.csv")
    
        KNN.loadModel(pathClassy+"optimal_models_final/", "knn_3cat_grid_eos_"+eos)
        new_dic = {}
        new_dic['knn'] = KNN
        new_dic['bayes'] = BayesFactor[count]
        EOSdic[eos] = new_dic
    
        print('*'*60)
        
    count = count + 1
        

Doing APR4_EPP
************************************************************
Nº of neighbors:  8
----------------------------------------
************************************************************
Loading training data...
Nº of features:  5
Nº of events for training:  139273
Datasets loaded!
************************************************************
************************************************************
Loading testing data...
Nº of features:  5
Nº of events for testing:  59688
Datasets loaded!
************************************************************
loading  /Users/miquelmiravet/Projects/IPAM_LA/ML_group/KNN_miq/optimal_models_final/knn_3cat_grid_eos_APR4_EPP.joblib
************************************************************
Doing BHF_BBB2
************************************************************
Nº of neighbors:  8
----------------------------------------
************************************************************
Loading training data...
Nº of features:  5
Nº of ev

************************************************************
Loading training data...
Nº of features:  5
Nº of events for training:  140784
Datasets loaded!
************************************************************
************************************************************
Loading testing data...
Nº of features:  5
Nº of events for testing:  60336
Datasets loaded!
************************************************************
loading  /Users/miquelmiravet/Projects/IPAM_LA/ML_group/KNN_miq/optimal_models_final/knn_3cat_grid_eos_SK272.joblib
************************************************************
Doing SKI2
************************************************************
Nº of neighbors:  8
----------------------------------------
************************************************************
Loading training data...
Nº of features:  5
Nº of events for training:  139343
Datasets loaded!
************************************************************
***************************************

************************************************************
Loading testing data...
Nº of features:  5
Nº of events for testing:  59208
Datasets loaded!
************************************************************
loading  /Users/miquelmiravet/Projects/IPAM_LA/ML_group/KNN_miq/optimal_models_final/knn_3cat_grid_eos_SLY230A.joblib
************************************************************


## Computing fractions of the O2 test dataset

In [100]:
for eos in EOS:
    print('Doing EOS: ', eos)
    print('='*60)
    matrix_KNN(EOSdic[eos]['knn'],nneigh[0],eos)
    print('#'*60)

Doing EOS:  APR4_EPP


  weighted_neighs = 1/distance[0]
  norm = np.sum(1/distance[0])
  nplus_NO[i] = np.sum(neighs_0)/norm


############################################################
Doing EOS:  BHF_BBB2
############################################################
Doing EOS:  H4
############################################################
Doing EOS:  HQC18
############################################################
Doing EOS:  KDE0V
############################################################
Doing EOS:  KDE0V1
############################################################
Doing EOS:  MPA1


  weighted_neighs = 1/distance[0]
  norm = np.sum(1/distance[0])
  nplus_NO[i] = np.sum(neighs_0)/norm


############################################################
Doing EOS:  MS1_PP


  weighted_neighs = 1/distance[0]
  norm = np.sum(1/distance[0])
  nplus_NO[i] = np.sum(neighs_0)/norm


############################################################
Doing EOS:  MS1B_PP


  weighted_neighs = 1/distance[0]
  norm = np.sum(1/distance[0])
  nplus_NO[i] = np.sum(neighs_0)/norm


############################################################
Doing EOS:  RS
############################################################
Doing EOS:  SK255
############################################################
Doing EOS:  SK272
############################################################
Doing EOS:  SKI2
############################################################
Doing EOS:  SKI3
############################################################
Doing EOS:  SKI4
############################################################
Doing EOS:  SKI5
############################################################
Doing EOS:  SKI6
############################################################
Doing EOS:  SKMP


  weighted_neighs = 1/distance[0]
  norm = np.sum(1/distance[0])
  nplus_NO[i] = np.sum(neighs_0)/norm


############################################################
Doing EOS:  SKOP
############################################################
Doing EOS:  SLy


  weighted_neighs = 1/distance[0]
  norm = np.sum(1/distance[0])
  nplus_NO[i] = np.sum(neighs_0)/norm


############################################################
Doing EOS:  SLY2


  weighted_neighs = 1/distance[0]
  norm = np.sum(1/distance[0])
  nplus_NO[i] = np.sum(neighs_0)/norm


############################################################
Doing EOS:  SLY9


  weighted_neighs = 1/distance[0]
  norm = np.sum(1/distance[0])
  nplus_NO[i] = np.sum(neighs_0)/norm


############################################################
Doing EOS:  SLY230A
############################################################


## Read O3 dataset 

In [101]:
df = pd.read_csv(pathClassy+'real_data/real_data.csv')
df.head()

Unnamed: 0,event_id,grace_id,m1_rec,m2_rec,chi1_rec,chi2_rec,snr,GWTC,pHasNS,pHasREM
0,GW170823,G298936,59.126324,24.816019,-0.298205,0.89852,11.296,1,0.0,0.0
1,GW170817,G298048,1.527005,1.242296,-0.015902,-0.035747,14.45,1,1.0,1.0
2,GW170814,G297595,29.478622,24.901943,-0.568798,0.130793,16.1496,1,0.0,0.0
3,GW170809,G296853,43.061466,30.084999,-0.120968,0.846714,11.2619,1,0.002,0.0
4,GW190408,G329243,32.132198,23.224018,0.279017,-0.647443,13.9286,2,0.0,0.0


In [102]:
columns = df[['m1_rec', 'm2_rec', 'chi1_rec', 'chi2_rec', 'snr']]

# Convert the DataFrame to a Numpy array
events = columns.values
nplus_NS = []
nplus_REM = []
nplus_NO = []
event_ids = []
for i in range(0,len(events[:,0])):
    event_ids.append(df['event_id'][i])

## Compute neighbors for O3 real data

In [103]:
for eos in EOS:
    print('Doing EOS: ', eos)
    print('='*60)
    npns,nprem,npno = matrix_KNN_O3(EOSdic[eos],nneigh[0],eos,events,event_ids, o3 = True)
    nplus_NS.append(npns)
    nplus_REM.append(nprem)
    nplus_NO.append(npno)
    print('#'*60)

Doing EOS:  APR4_EPP
############################################################
Doing EOS:  BHF_BBB2
############################################################
Doing EOS:  H4
############################################################
Doing EOS:  HQC18
############################################################
Doing EOS:  KDE0V
############################################################
Doing EOS:  KDE0V1
############################################################
Doing EOS:  MPA1
############################################################
Doing EOS:  MS1_PP
############################################################
Doing EOS:  MS1B_PP
############################################################
Doing EOS:  RS
############################################################
Doing EOS:  SK255
############################################################
Doing EOS:  SK272
############################################################
Doing EOS:  SKI2
################################

## Save fractions/number of neighbors of O3 data

In [104]:
data = pd.DataFrame({'event_ID':event_ids})

for i,eos in enumerate(EOS):
    data['f0_'+eos] = nplus_NO[i]
    data['f1_'+eos] = nplus_NS[i]
    data['f2_'+eos] = nplus_REM[i]
    
    
data.to_csv('matrix_KNN_O3_all_EOS.csv', index = False)   

## Read parameter sweep file

In [105]:
df = pd.read_csv(pathClassy+'sweeps/sweep_v2.csv')

In [106]:
events = df.values
events = events[:,1:]
print(events)
nplus_NS = []
nplus_REM = []
nplus_NO = []

[[ 1.39698492  1.          0.          0.         10.        ]
 [ 1.79396985  1.          0.          0.         10.        ]
 [ 1.79396985  1.39698492  0.          0.         10.        ]
 ...
 [80.         78.80904523  1.          1.         10.        ]
 [80.         79.20603015  1.          1.         10.        ]
 [80.         79.60301508  1.          1.         10.        ]]


In [107]:
for eos in EOS:
    print('Doing EOS: ', eos)
    print('='*60)
    npns,nprem,npno = matrix_KNN_O3(EOSdic[eos],nneigh[0],eos,events)
    nplus_NS.append(npns)
    nplus_REM.append(nprem)
    nplus_NO.append(npno)
    print('#'*60)

Doing EOS:  APR4_EPP
############################################################
Doing EOS:  BHF_BBB2
############################################################
Doing EOS:  H4
############################################################
Doing EOS:  HQC18
############################################################
Doing EOS:  KDE0V
############################################################
Doing EOS:  KDE0V1
############################################################
Doing EOS:  MPA1
############################################################
Doing EOS:  MS1_PP
############################################################
Doing EOS:  MS1B_PP
############################################################
Doing EOS:  RS
############################################################
Doing EOS:  SK255
############################################################
Doing EOS:  SK272
############################################################
Doing EOS:  SKI2
################################

## Saving results into file


In [108]:
data = pd.DataFrame({'event_ID':np.arange(0,len(events[:,0]))})

for i,eos in enumerate(EOS[-6]):
    data['f0_'+eos] = nplus_NO[i]
    data['f1_'+eos] = nplus_NS[i]
    data['f2_'+eos] = nplus_REM[i]
    
    
data.to_csv('matrix_KNN_param_sweep_v2.csv', index = False)  