In [1]:
import numpy as np

In [2]:
#PREPARING DATA FOR REGRESSION
#We have some fake (fake) random data representing the properties of the binary
#We relate the quantities in a complicated way that represents the unknown error introduced by GSTLAL
#We want to recover the initial quantities, as the ML algorithm should be able to guess this complicated relation function

np.random.seed(21) #fix random seed so we can reproduce results

#Prepare example dataset:
Nsample=3 #number of data points
dsample=7 #dimension of the data point DONT CHANGE
    #Example: we have Nsample injections, each of them with dsample parameters estimated
#We will simulate two random masses between 1.1 and 2.1, their two random magnitudes of the spin between 0 and 1, and a random angle between them, from 0 to pi
y=np.zeros((Nsample,dsample))
for i in range (0,Nsample):
    y[i][0:2]=1.1+ np.random.random_sample((1,2)) 
    maxm=np.maximum(y[i][0],y[i][1])
    minm=np.minimum(y[i][0],y[i][1])
    y[i][0]=maxm
    y[i][1]=minm
    y[i][2:4]=np.random.random_sample((1,2)) 
    y[i][4]=np.pi*np.random.random_sample((1,1)) 
    y[i][5]=y[i][0]/y[i][1]  #added mass ratio!
    y[i][6]=(y[i][0]*y[i][1])**(3.0/5)/(y[i][0]+y[i][1])**(1.0/5)    #added chirp mass!

In [3]:
#this emulates the nonlinear effects introduced by the GSTLAL pipeline
def complicated_f(vector):
    x=vector[0]; y=vector[1]; z=vector[2]; w=vector[3]; v=vector[4]; a=vector[5]; b=vector[6]
        return [x*z+w**2-v/np.sqrt(y)+(1-a)/(b+3), y**3*(a+np.pi*w)/(1+v+3*z), np.sqrt((x-v)**2+(b-w)**2+(z-x)**2), np.sqrt(x+a)/(2*y**2*z+1)+v/(4*y*z*np.sqrt(np.abs(v-b))+2)-w,np.sin(x+y)+(a-z)*w/(z+w+v),(a**2+(b-y)+x/3)*(z*np.sqrt(w)-v),b]

In [4]:
#We prepare the results "from the pipeline"
errorpct=0.05  #noise percentage to introduce
x=np.zeros((Nsample,dsample));
for i in range (0,Nsample):
    x[i]=complicated_f(y[i])
    for j in range(0, dsample): #plus some random error up to specified % of the value
        x[i][j]=x[i][j]+x[i][j]*(-1)**np.random.randint(1,3)*errorpct*np.random.rand()

In [5]:
#NEW!: dataset only with only mass1 and the chirp mass
y_nomass2=np.delete(y,1,1) #delete m2 column from datasets
x_nomass2=np.delete(x,1,1)
#Then train the ML with data x and tag y, to see if it can discover the complicated function relating the quantities


In [None]:
#Create a test case to see how your trained ML performs
ytest=np.zeros(dsample)
ytest[0:2]=1.1+ np.random.random_sample((1,2))
maxm=np.maximum(ytest[0],ytest[1])
minm=np.minimum(ytest[0],ytest[1])
ytest[0]=maxm
ytest[1]=minm
ytest[2:4]=np.random.random_sample((1,2)) 
ytest[4]=np.pi*np.random.random_sample((1,1)) 
ytest[5]=ytest[0]/ytest[1]  #added mass ratio!
ytest[6]=(ytest[0]*ytest[1])**(3.0/5)/(ytest[0]+ytest[1])**(1.0/5)

xtest_nomass2=np.delete(ytest,1,1)

xtest_nomass2=complicated_f(ytest)
print('introduced',xtest_nomass2)
print('to be recovered',ytest)

In [None]:
#Evaluate your ML
#yresult=ml.test(xtest_nomass2)    #usually to evaluate we use method.test or something similar. Look at the documentation of your function   
print('recovered',yresult) 
#are the results good?
#Do you want more testing cases for stadistics? Genererate another data set like the training one