In [None]:
from keras.layers import Dense, Dropout, Conv1D, MaxPool1D, Flatten, SpatialDropout1D
from keras.models import Sequential
from keras import optimizers
from keras.callbacks import Callback,EarlyStopping,ModelCheckpoint
# General packages
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd

# Data preparation and validation packages
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# Jupyter interactive plotting
from IPython.display import clear_output
pd.set_option('display.max_rows', None)

Using TensorFlow backend.
  import pandas.util.testing as tm


In [None]:
class PlotLosses(Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.accuracies = []
        self.val_losses = []
        self.val_accuracies = []
        
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.accuracies.append(logs.get('acc'))
        self.val_losses.append(logs.get('val_loss'))
        self.val_accuracies.append(logs.get('val_acc'))
        self.i += 1
        
        clear_output(wait=True)
        plt.plot(self.x, self.losses, label="loss")
        plt.plot(self.x, self.accuracies, label="accuracy")
        plt.plot(self.x, self.val_losses, label="val_loss")
        plt.plot(self.x, self.val_accuracies, label="val_accuracy")
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)),plt.grid()
        plt.show();
        
plot_losses = PlotLosses()

In [None]:
#TRAINING+VALIDATION SET(85 % OF DATA)
poisson=10
for i in np.arange(0,85,85):
    print('BBH'+str(poisson)+'s_serie'+str(i)+'.csv')
    inputdata=pd.read_csv("/content/BBH4s_serie"+i+".csv") 
    del inputdata['Unnamed: 0']
    for j in range(i+1,i+85):
      inputdata_append=pd.read_csv("/home/marangio/Desktop/MDC1_BBH"+str(poisson)+"s"+"/BBH"+str(poisson)+"s_serie"+str(j)+".csv")
      print('BBH'+str(poisson)+'s_serie'+str(j)+'.csv')
      del inputdata_append['Unnamed: 0']
      inputdata=pd.concat([inputdata,inputdata_append])

inputdata=shuffle(inputdata)
columnsET=[str(l) for l in range(4096)]
inputs=inputdata[columnsET].values
inputs=preprocessing.minmax_scale(inputs.T).T
targets=inputdata['Label'].values
inputs = inputs.reshape((inputs.shape[0], inputs.shape[1], 1))
onehot_encoder = OneHotEncoder(sparse=False)
targets = targets.reshape(targets.shape[0], 1)
targets = onehot_encoder.fit_transform(targets)
x_train, x_val, y_train, y_val = train_test_split(inputs, targets, test_size=0.3)
n_classes = len(np.unique(targets))

       

BBH4s_serie0.csv


FileNotFoundError: ignored

In [None]:
 #CNN
model = Sequential()

model.add(Conv1D(nb_filter=16, filter_length=32, activation="relu", input_shape=(inputs.shape[1],1)))
model.add(MaxPool1D(8))
#model.add(SpatialDropout1D(0.2))

model.add(Conv1D(nb_filter=12, filter_length=16, activation="relu"))
model.add(MaxPool1D(8))
#model.add(SpatialDropout1D(0.2))

model.add(Conv1D(nb_filter=8, filter_length=8 ,activation="relu"))
model.add(MaxPool1D(8))
#model.add(SpatialDropout1D(0.2))

#model.add(Dropout(0.1))
model.add(Flatten())

model.add(Dense(100, activation="relu"))
model.add(Dense(50, activation="relu"))
model.add(Dense(2, activation="softmax"))


In [None]:
#TRAINING+VALIDATION(85 % OF DATA)
opt = optimizers.Adam(lr=0.001)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=["acc"])
model_checkpoint = ModelCheckpoint('best_weights_poisson'+str(poisson)+'.h5', monitor='val_acc',verbose=1, save_best_only=True)
es=EarlyStopping(monitor='val_acc',min_delta=0.00005,mode='max',verbose=1,patience=50,restore_best_weights=True)

model.fit(x_train, y_train,   
                epochs=100,
                batch_size=64,
                validation_data=(x_val, y_val),
                callbacks=[plot_losses,es,model_checkpoint],shuffle=True)

model.save_weights('final_weights_poisson'+str(poisson)+'.h5')

In [None]:
#TESTING SET (15 % OF DATA)
for i in np.arange(85,100,15):
  print('BBH'+str(poisson)+'s_serie'+str(i)+'.csv')
  inputdata=pd.read_csv("/home/marangio/Desktop/MDC1(ET)/MDC1_BBH"+str(poisson)+"s/BBH"+str(poisson)+"s_serie"+str(i)+".csv")
  del inputdata['Unnamed: 0']
  for j in range(i+1,i+15):
    inputdata_append=pd.read_csv("/home/marangio/Desktop/MDC1(ET)/MDC1_BBH"+str(poisson)+"s/BBH"+str(poisson)+"s_serie"+str(j)+".csv")
    print('BBH'+str(poisson)+'s_serie'+str(j)+'.csv')
    del inputdata_append['Unnamed: 0']
    inputdata=pd.concat([inputdata,inputdata_append])


    

BBH4s_serie85.csv


FileNotFoundError: ignored

In [None]:
#CONFUSION MATRIX
inputdata=shuffle(inputdata)
columnsET=[str(m) for m in range(4096)]
inputs=inputdata[columnsET].values
inputs=preprocessing.minmax_scale(inputs.T).T
targets=inputdata['Label'].values
inputs = inputs.reshape((inputs.shape[0], inputs.shape[1], 1))
onehot_encoder = OneHotEncoder(sparse=False)
targets = targets.reshape(targets.shape[0], 1)
targets = onehot_encoder.fit_transform(targets)
n_classes = len(np.unique(targets))
y_predicted = model.predict(inputs) 
cms = confusion_matrix(targets.argmax(1), y_predicted.argmax(1)) 
test_score = np.trace(cms) / np.sum(cms) 
new_cms = np.zeros((n_classes,n_classes))
for x in range(n_classes):
    for y in range(n_classes):
        new_cms[x,y] = round(cms[x,y] / np.sum(cms[x])*100,1)

plt.rc("font", size=20)
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111)
im = ax.imshow(np.transpose(new_cms), interpolation="nearest", cmap="cool")
for x in range(0, n_classes):
    for y in range(0, n_classes):
        ax.text(x, y, new_cms[x,y], color="black", ha="center", va="center")
plt.title("Total accuracy: " + str(np.around(test_score*100, 1)), fontsize=20)

plt.colorbar(im)

classes_values = []
classes_labels = ["Noise", "BBH"]
for n in range(n_classes):
    classes_values.append(n)

plt.xticks(classes_values, classes_labels, rotation=45, fontsize=15)
plt.yticks(classes_values, classes_labels, fontsize=15)
plt.xlabel("Real data", fontsize=15)
plt.ylabel("Predicted data", fontsize=15), plt.ylim([-0.5,n_classes-0.5])
axis = plt.gca()
ax.set_ylim(ax.get_ylim()[::-1])


In [None]:
#TESTING FOR POISSON PARAMETER
progress=np.zeros(100)
directories=['/home/shared/MDC/long_data/bbh_4','/home/shared/MDC/long_data/bbh_10','/home/shared/MDC/long_data/noise']# RAW DATA
directory=directories[1]
targets=np.zeros([2048,1])
onehot_encoder = OneHotEncoder(sparse=False)
targets = targets.reshape(targets.shape[0], 1)
targets = onehot_encoder.fit_transform(targets)

for j in np.arange(0,100,1):
    fileserie=directory+'/serie_'+str(j)+'.dat.gz'
    print(fileserie)
    ETdata=df=pd.read_csv(fileserie,sep=' ', lineterminator='\n',header=None,names=['t0','ET1','ET2','ET3'],usecols=['ET1',])
    datalist=[]
    if t0!=1:
        targets=np.zeros([2047,1])
        
    for k in np.arange(0,2048):
        if (4096*k+t*4096+4096)<2048*4096:
            frame=ETdata[(4096*k+int(t*4096)):(4096*k+int(t*4096)+4096)]
            frame=frame.transpose()
            frame=np.array(frame)

            try:
                datalist=np.vstack((datalist,frame))
            except:
                datalist=frame    

    inputs=preprocessing.minmax_scale(datalist.T).T
    inputs = inputs.reshape((inputs.shape[0], inputs.shape[1], 1))
    y_predicted=model.predict(inputs)
    cms = confusion_matrix(targets.argmax(1), y_predicted.argmax(1))
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.transpose(cms), interpolation="nearest", cmap="cool")
    rows = cms.shape[0]
    cols = cms.shape[1]
    for x in range(0, rows):
        for y in range(0, cols):
            value = int(cms[x, y])
            ax.text(x, y, value, color="black", ha="center", va="center", fontsize=25)

    test_score= format(1/(cms[0,1]/2047),'.3g')
    progress[j]=test_score
    plt.title("Distribution:" + str(test_score)+'s', fontsize=25)
    plt.colorbar(im)

    classes_values = []
    classes_labels = []
    for n in range(n_classes):
        classes_values.append(n)
        classes_labels.append(str(n))

    plt.xticks(classes_values, classes_labels, rotation=45, fontsize=15)
    plt.yticks(classes_values, classes_labels, fontsize=15)
    plt.xlabel("Real data", fontsize=15)
    plt.ylabel("Predicted data", fontsize=15), plt.ylim([-0.5,n_classes-0.5])
    axis = plt.gca()
    ax.set_ylim(ax.get_ylim()[::-1])