This is my model for my final work in upper secondary school

In [1]:
# The first installed libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import Files and Directories for training
from google.colab import drive
drive.mount('/content/gdrive')

path = "/content/gdrive/MyDrive/Colab_Notebooks/ML/"

Mounted at /content/gdrive


In [3]:
# check that it is mounted properly
!ls gdrive/MyDrive/Colab_Notebooks/ML/

This is part of my GitHub repo, https://www.github.com/Irreq/gyarbete

Used for synthetic data generation


In [8]:
from sklearn.neighbors.kde import KernelDensity

from scipy.signal import argrelextrema



class KernelGenerator(object):

    def __init__(self, size=200, debug=False):

        self.data = None

        self.size = size

        self.debug = debug

        self.bias = {

            "normal_distribution" : [[0.5, 0.2],], # median, standard deviation

        }

    def kernel_normal_dist(self, mean, std, size=None, window=[0, 1]):

        """ Returns normal distribution """

        if size == None:
            size = self.size

        s = np.random.normal(mean, std, size)

        min_val, max_val = window[0], window[1]

        return [i for i in s if min_val <= i <= max_val]

    def kernel_density(self):

        pre_data = self.data

        start = np.array(pre_data)

        start_len = len(start)

        resolution = np.linspace(0, 1, num=10).tolist()

        pre_data = np.histogram(pre_data, bins=resolution)[0]

        pre_data = pre_data / max(pre_data)

        pre_data = np.array([int(i*100) for i in pre_data.tolist()])

        initial_length = int(len(pre_data) * 2) # 2 is an arbitary good number to use

        a = pre_data.reshape(-1, 1)

        kde = KernelDensity(kernel='gaussian', bandwidth=2).fit(a)
        s = np.linspace(0, initial_length)
        e = kde.score_samples(s.reshape(-1, 1))

        lower_boundaries = argrelextrema(e, np.less)[0]

        minima = s[lower_boundaries]

        demodulated_index = [int((i/initial_length)*start_len) for i in minima]

        return start[np.array(demodulated_index)]


    def kernel_generator(self, localkernel):

        normal_distributions = []

        for k in range(len(localkernel)):

            distribution = self.kernel_normal_dist(localkernel[k][0], localkernel[k][1], self.size)

            normal_distributions.extend(distribution)


        normal_distributions.sort()

        self.data = normal_distributions[::int(round(len(normal_distributions)/self.size))]

        if len(self.data) > self.size:

            for i in range(len(self.data)-self.size):

                del self.data[np.random.randint(len(self.data))]


        elif len(self.data) < self.size:

            local_average = np.mean(np.array(self.data))

            values = self.kernel_density()[0]

            values = self.kernel_normal_dist(values, 0.1, size=self.size-len(self.data))

            values = [(i+local_average)/2 for i in values]

            self.data.extend(values)

            self.data.sort()



        if self.debug:

            import matplotlib.pyplot as plt

            print('elements : {}\nmean : {}\nmax : {}\nmin : {}'.format(len(self.data),np.mean(np.array(self.data)), max(self.data), min(self.data)))

            plt.hist(np.array(self.data), bins=np.linspace(0,1, num=100).tolist())
            plt.show()

        return self.data

    def kernel_error_catcher(self, kernel_seed):

        ErrorCount = 0

        while True:

            try:
                return self.kernel_generator(kernel_seed)

                break

            except Exception as e:

                ErrorCount += 1

                if ErrorCount > 100:
                    print('Error overflow')
                    print(e)
                    print("error found in kernel density estimation last row")
                    break




    def random_kernels(self, n_kernels):

        self.bias['normal_distribution'] = self.kernel_error_catcher(self.bias['normal_distribution'])

        def dub(n):

            return [0.2, 0.3], [0.8, 0.7]

        for i in range(n_kernels):

            x = np.random.choice(self.bias['normal_distribution'])

            print("normal_distribution")

            print(x)

            self.bias[i] = [dub(x)]

        return

    def start(self):

        for id in self.bias:

            if type(self.bias[id][0]).__name__ == list:
                continue

            self.bias[id] = self.kernel_error_catcher(self.bias[id])

    def addbias(self, *kernel):

        """
        Adds distributions from a dictionary


        kernel = eg, {'tag':[[0.4]]}

        returns = dict() # distribution

        """

        if len(kernel) == 0:
            return

        else:
            kernel = kernel[0]

        for id in kernel.keys():
            self.bias[id] = self.kernel_error_catcher(kernel[id])

        return {id:self.bias[id] for id in kernel.keys()}

    def getbias(self):

        return self.bias

    def setwindow(self, lower, upper, kernel_id):

        if type(kernel_id) == str:

            try:

                data = self.bias[kernel_id]

            except Exception as e:
                print(e)
                print("Window {} < x < {} could not be set, due to:".format(lower, upper))
                return

        elif type(kernel_id) == list:

            data = kernel_id

        else:
            data = kernel_id

        data = [i*(upper-lower)+lower for i in data]

        if type(kernel_id) == str:

            self.bias[kernel_id] = data

        return data

    def fastgen(self, distribution):

        resolution = self.size

        window = len(distribution) ** -1

        nd = self.getbias()["normal_distribution"]

        window_distribution = self.setwindow(0,window,nd)

        final_distribution = []

        for i, value in enumerate(distribution):

            data = [i*window+np.random.choice(window_distribution) for _ in range(int(value*resolution))]

            final_distribution.extend(data)

        return np.array(final_distribution)



In [9]:
# =============== Start Borrowed Code ======================
#
# Author : Ian Cotter-Llewellyn
# Source : https://github.com/ian-llewellyn/manchester-coding
#
# Code is untouched and just placed in
# this file for ease of readability

class Manchester(object):

    """

    Manchester(differential=True).encode(data)

    # -*- coding: utf-8 -*-

    G. E. Thomas: 0 = 01, 1 = 10
    ISO 802.4: 0 = 10, 1 = 01

    """
    _bit_symbol_map = {
        # bit: symbol
        '0': '01',
        '1': '10',
        'invert': {
            # bit: symbol
            '0': '10',
            '1': '01'},
        'differential': {
            # (init_level, bit): symbol
            ('1', '0'): '01',
            ('0', '0'): '10',
            ('0', '1'): '11',
            ('1', '1'): '00'
        }
    }

    def __init__(self, differential=False, invert=False):
        self._invert = invert
        self._differential = differential
        self._init_level = '0'

    def invert(self):
        self._invert = not self._invert

    def differential(self):
        self._differential = not self._differential

    def decode(self, symbols):
        bits = ''
        while len(symbols):
            symbol = symbols[0:2]
            symbols = symbols[2:]

            if self._differential:
                for ib, s in self._bit_symbol_map['differential'].items():
                    if symbol == s:
                        bits += ib[1]
                continue

            if self._invert:
                for b, s in self._bit_symbol_map['invert'].items():
                    if symbol == s:
                        bits += b
                continue

            for b, s in self._bit_symbol_map.items():
                if symbol == s:
                    bits += b

        return bits

    def encode(self, bits, init_level=None):
        if init_level:
            self._init_level = init_level

        symbols = ''
        for bit in bits:
            # Differential Manchester Coding
            if self._differential:
                symbols += self._bit_symbol_map['differential'][(self._init_level, bit)]
                self._init_level = symbols[-1]
                continue

            # IEEE 802.4 (Inverted Manchester Coding)
            if self._invert:
                symbols += self._bit_symbol_map['invert'][bit]
                continue

            # Manchester Coding
            symbols += self._bit_symbol_map[bit]

        return symbols

# =============== End Borrowed Code ======================

def str_to_bin(data_string, encoding=False):

    """
    String to Binary String Converter Function

     data_string : str()
        encoding : Boolean

         returns : str()
    """

    binary = "".join(f"{ord(i):08b}" for i in data_string) # String to binary conversion

    if encoding:

        binary = Manchester(differential=True).encode(binary) # returns Manchester encoded data

    return binary


In [193]:
# =============== Start Modulation =======================

class Modulation(object):

    def __init__(self, frequency=1e3, samplingrate=44.1e3,
                        bitrate=10, amplitude=0.5, encoding=True):

        """
            frequency : the frequency of the signal
         samplingrate : the samplingrate, prefferably 44.1kHz
              bitrate : float or integer
            amplitude : prefferably <= 2.0
             encoding : Boolean


        """

        assert float(frequency) > 0
        self.frequency = frequency

        assert float(samplingrate) > 2 * frequency, "Insufficient Nyquist rate"
        self.samplingrate = samplingrate

        assert float(bitrate) > 0
        self.bitrate = bitrate

        assert 0 <= amplitude <= 1, "Amplitude is out of boundaries."
        self.amplitude = amplitude

        self.encoding = encoding


    def sine_wave_generator(self, duration):

        """
        Sinusoidal Wave Generating Function

            duration : second(s)

             returns : numpy.array() #sine wave
        """

        # Generates from 0 to 1/Fbit with steps from fs

        arranged = np.arange(0, duration, 1/self.samplingrate, dtype=np.float32)

        # Carrier wave

        sinusoidal_wave = np.sin(2 * np.pi * self.frequency * arranged)

        return sinusoidal_wave

    def smooth(self, data, n, curve=0.05):

        """
        Frequency Change Damping Function

        Smooths signal so the speakers won't break on frequency change

           data : list
              n : the factor of multiplication
          curve : higher value results in smoother curve

        returns : list #magnified ~n times
        """

        # the function uses some werid parsing when multiplying the values,
        # thus it has to be split as follows. This is a low priority bug.

        data = [data[0], *data, data[-1]]

        D = np.linspace(0,2, n)

        sigmaD = 1 / (1 + np.exp(-(1 - D) / curve))

        def sigma(x0, x1):

            return x0 + (x1 - x0)*(1 - sigmaD)

        result = [c for i in range(len(data)) if i+1 < len(data) for c in sigma(data[i],data[i+1])]

        start, end = int(np.floor(n/2)), int(np.ceil(n/2))

        return result[start:-end]


    def modulate(self, payload):

        """
        Amplitude Modulation Function

           payload : list
          curve : higher value results in smoother curve

        returns : numpy.array() #modulated signal
        """

        if type(payload).__name__ == "str":

            payload = str_to_bin(payload, encoding=self.encoding)

            payload = [int(i) for i in payload]


        # data preprocessing

        payload = [i if i==1 else self.amplitude for i in payload]

        bit_length = int(self.samplingrate / self.bitrate)

        pre_modulated_signal = np.array(self.smooth(payload, bit_length))

        duration = len(pre_modulated_signal) / self.samplingrate

        carrier = self.sine_wave_generator(duration)

        carrier *= pre_modulated_signal

        if max(abs(carrier)) > 1:

          carrier /= max(abs(carrier))

        return carrier

# ================= End Modulation =======================

In [234]:


# dataset generation

def add_white_noise(sig, k):

    if not 0 <= k <= 1:
        print(f"K must be within 1 and 0 not {k}, k will now equal to 0.1")
        k = 0.1

    n = len(sig)

    white = np.array([np.random.random()*2-1 for i in range(n)]) * k

    mixed = white + sig * (1-k)

    if max(abs(mixed)) > 1:

          mixed /= max(abs(mixed))

    return mixed

def generate_dataset(n, size):

    kg = KernelGenerator()

    kg.start()

    bias = kg.fastgen([0.1, 0.4, 0.3, 0.2])

    bias = kg.setwindow(0, 0.5, bias)

    M = Modulation(frequency=8e3, bitrate=20)

    bin = {1:0,0:1}



    streams = []

    names = []

    final = []

    for k in range(n):

        data = []

        for i in range(size):

            if i not in [0,1]:
                if data[-1] == data[-2]:
                    data.append(bin[data[-2]])
                    continue

            data.append(np.random.choice([0,1]))

        signal = M.modulate(data)

        signal = add_white_noise(signal, np.random.choice(bias))

        streams.append(signal)

        names.append(np.array(data))

        data.append(signal)

        final.append(np.array(data))

    return np.array(final)


def generate(n):

  # 0 = 0, 1 = 1, 00, = 2, 11 = 3

  scheme = {
                      '0':[0],
                     '00':[0,0],
                      '1':[1],
                     '11':[1,1],
  }

  data_x = []

  data_y = []

  M = Modulation(frequency=2e3, bitrate=50)

  kg = KernelGenerator()
  kg.start()
  bias = kg.fastgen([0.1, 0.4, 0.3, 0.2])
  bias = kg.setwindow(0, 0.4, bias)

  for i in range(n):

    tag = np.random.choice(list(scheme.keys()))

    data_y.append(tag)

    signal = M.modulate(scheme[tag])

    # print(np.zeros(np.random.randint(50, 1000)).tolist())

    result = np.zeros(np.random.randint(50,100)).tolist()

    result.extend(signal)

    result.extend(np.zeros(np.random.randint(50, 100)).tolist())

    signal = np.array(result)

    # signal = add_white_noise(signal, np.random.choice(bias))

    data_x.append(np.float32(signal))

  return data_x, data_y
    



In [254]:
# data_x = [] # continious stream of modulated signals as either 0, 00, 1, 11

# data_y = [] # The corresponding labels but will be modulated as 0 = 0, 1 = 1, 00, = 2, 11 = 3


size = 1000
data_x, data_y = generate(size)

# preprocessing data

from keras.preprocessing.sequence import pad_sequences

data_x = pad_sequences(data_x, maxlen=2100, dtype='float', padding='post', truncating='post', value=0.)

data_x = data_x / np.max(data_x)

data_x = data_x[:,:,np.newaxis]



# Labeling
# 0 = 0, 1 = 1, 00, = 2, 11 = 3

data_y = pd.Series(data_y)
data_y.value_counts()
data_y = data_y.map({'0':0, '1':1, '00':2, '11':3}).values


In [257]:
# Creation of deep learning model

from keras.layers import InputLayer, Conv1D, Dense, Flatten, MaxPool1D
from keras.models import Sequential

model = Sequential()

model.add(InputLayer(input_shape=data_x.shape[1:]))




model.add(Conv1D(filters=500, kernel_size=10, activation='relu')) 
model.add(MaxPool1D(strides=16)) 
model.add(Conv1D(filters=50, kernel_size=10, activation='relu')) 
model.add(MaxPool1D(strides=8)) 
model.add(Flatten()) 
model.add(Dense(4, activation='softmax'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# display model info
model.summary()

# model.add(Conv1D(filters=50, kernel_size=10, activation='relu')) 
# model.add(MaxPool1D(strides=8)) 
# model.add(Conv1D(filters=50, kernel_size=10, activation='relu')) 
# model.add(MaxPool1D(strides=8)) 
# model.add(Flatten()) 
# model.add(Dense(4, activation='softmax'))

# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# # display model info
# model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_8 (Conv1D)            (None, 2091, 500)         5500      
_________________________________________________________________
max_pooling1d_8 (MaxPooling1 (None, 131, 500)          0         
_________________________________________________________________
conv1d_9 (Conv1D)            (None, 122, 50)           250050    
_________________________________________________________________
max_pooling1d_9 (MaxPooling1 (None, 16, 50)            0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 800)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 4)                 3204      
Total params: 258,754
Trainable params: 258,754
Non-trainable params: 0
________________________________________________

In [258]:
# train model
model.fit(data_x, data_y, batch_size=2000, epochs=10)

# data_x.shape[1:]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


KeyboardInterrupt: ignored

In [None]:

import tensorflow as tf
# import numpy as np
# import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import os
#%% Specify parameters 
batch_size = 40                    #Note that large batch sized is linked to sharp gradients
training_steps = 10                #Number of batches to train on
num_epochs = 60                    #None to repeat dataset until all steps are executed
eval_folder = 'G:\powerLineData\TFR_eval_sfft'     #Subfolder containing TFR files with evaluation data
train_folder = 'G:\powerLineData\TFR_train_sfft'   #Subfolder containing TFR files with training data
predict_folder = 'G:\powerLineData\TFR_predict_sfft'   #Subfolder containing TFR files with training data
#%% Building the CNN Classifier
def cnn_model_fn(features, labels, mode):
  """Model function for CNN."""
  if mode == tf.estimator.ModeKeys.PREDICT:
      pass
  else:    
      labels=tf.reshape(labels,[-1,1])
  input_layer = tf.reshape(features["signal_data"], [-1, 240, 200,1])
  print(input_layer)
  
  # Convolutional Layer #1
  conv1 = tf.layers.conv2d(
      inputs=input_layer,
      filters=32,
      kernel_size=[5, 5],
      strides=(2, 2),
      padding="same",
      activation=tf.nn.relu)
      #Output -1,120,100,32
  print(conv1)

  # Convolutional Layer #2 and Pooling Layer #2
  conv2 = tf.layers.conv2d(
      inputs=conv1,
      filters=64,
      kernel_size=[3, 3],
      padding="same",
      activation=tf.nn.relu)
  #Output -1,120,100,64
  pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
  #Output -1,60,50,64
  dropout = tf.layers.dropout(
      inputs=pool2, rate=0.1, training=mode == tf.estimator.ModeKeys.TRAIN)
  
  # Convolutional Layer #3 and Pooling Layer #3
  conv3 = tf.layers.conv2d(
      inputs=dropout,
      filters=128,
      kernel_size=[3, 3],
      padding="same",
      activation=tf.nn.relu)
  #Output -1,60,50,128
  pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2)
  #Output -1,30,25,128
  dropout2 = tf.layers.dropout(
      inputs=pool3, rate=0.1, training=mode == tf.estimator.ModeKeys.TRAIN)
  
  # Convolutional and pooling Layer #4
  conv4 = tf.layers.conv2d(
      inputs=dropout2,
      filters=200,
      kernel_size=[3, 3],
      padding="same",
      activation=tf.nn.relu)
  #Output -1,30,25,200 
  pool4 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2)
  #Output -1,15,12,200

  # Dense Layer
  pool4_flat = tf.reshape(pool4, [-1, 15 * 12 * 200])
  dense = tf.layers.dense(inputs=pool4_flat, units=4096, activation=tf.nn.relu) 
  
  dropout3 = tf.layers.dropout(
      inputs=dense, rate=0.2, training=mode == tf.estimator.ModeKeys.TRAIN)
  
  dense2 = tf.layers.dense(inputs=dropout3, units=2048, activation=tf.nn.relu)
  
  dropout4 = tf.layers.dropout(
      inputs=dense2, rate=0.2, training=mode == tf.estimator.ModeKeys.TRAIN)

  # Logits Layer
  logits = tf.layers.dense(inputs=dropout4, units=1)

  predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.round(tf.nn.sigmoid(logits)),
      "probabilities": tf.nn.sigmoid(logits, name="probs_tensor"),
      "signal_id": tf.reshape(features["signal_ID"],[-1,1])
  }
  

  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=labels, logits=logits)


  # Configure the Training Op (for TRAIN mode)
  if mode == tf.estimator.ModeKeys.TRAIN:
    
    # Calculate Loss (for both TRAIN and EVAL modes) via cross entropy
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

  # Add evaluation metrics (for EVAL mode)
  eval_metric_ops = {
          "accuracy": tf.metrics.auc(
          labels=labels, predictions=predictions["classes"])
    
  }
  return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

#%% CREATE ESTIMATOR

# Create the Estimator
discharge_classifier = tf.estimator.Estimator(
    model_fn=cnn_model_fn, model_dir="/tmp4096/sfft_convnet_model")

#%% Set Up a Logging Hook

# Set up logging for predictions
tensors_to_log = {"probabilities": "probs_tensor"}

logging_hook = tf.train.LoggingTensorHook(
    tensors=tensors_to_log, every_n_iter=50)

#%% Input function for training data

def dataset_input_fn(subfolder, batch_size, train = False, num_epochs=None):
         
    filenames = [file for file in os.listdir(subfolder) if file.endswith('.tfrecord')]
    filenames = [os.path.join(subfolder, file) for file in filenames]
    dataset = tf.data.TFRecordDataset(filenames)

    #Create record extraction function
    def parser(record):
        features = {
            'signal': tf.FixedLenFeature([50000], dtype=tf.float32),
            'signal_ID': tf.FixedLenFeature([], dtype=tf.int64),
            'measurement_ID': tf.FixedLenFeature([], dtype=tf.int64),
            'label': tf.FixedLenFeature([], dtype=tf.int64)}
        parsed = tf.parse_single_example(record, features)
        
        # Perform additional preprocessing on the parsed data.
        bw_data = tf.reshape(tf.sqrt(parsed['signal']), [-1, 250, 200])
        bw_data = tf.slice(bw_data, [0, 2, 0], [1, 240, 200])
        
        # Min max normalization
        bw_data = tf.div(
                tf.subtract(
                    bw_data, 
                    tf.reduce_min(bw_data)
                ), 
                tf.subtract(
                    tf.reduce_max(bw_data), 
                    tf.reduce_min(bw_data)
                )
        )
        bw_data = tf.round(bw_data)
        
        signal_data = tf.reshape(parsed['signal'], [-1, 250, 200])
        #remove low frequency components
        signal_data = tf.slice(signal_data, [0, 2, 0], [1, 240, 200])
        
        #Normalize and scale data
        qube = tf.fill([240,200],1/3)
        signal_data = tf.pow(signal_data,qube)
        signal_data = tf.image.per_image_standardization(signal_data)
        
        norm_max = tf.fill([240,200],6.0)
        signal_data = tf.divide(signal_data,norm_max)

        label = tf.cast(parsed["label"], tf.int32)
    
        return {"signal_data": signal_data, "bw_data": bw_data, "signal_ID": parsed["signal_ID"]}, label

    # Use `Dataset.map()` to build a pair of a feature dictionary and a label
    # tensor for each example.
    dataset = dataset.map(parser)
    
    #Shuffle data if in training mode
    if train:
        dataset = dataset.shuffle(buffer_size=batch_size*2)  #Shuffles along first dimension(rows)(!)  and selects from buffer
    dataset = dataset.batch(batch_size)
    dataset = dataset.repeat(num_epochs)
    
    # Each element of `dataset` is tuple containing a dictionary of features
    # (in which each value is a batch of values for that feature), and a batch of
    # labels.
    return dataset

#%% Train the clasifier
discharge_classifier.train(
    input_fn=lambda : dataset_input_fn(train_folder, train = True, batch_size = batch_size, num_epochs=num_epochs),
    steps=training_steps,
    hooks=[logging_hook])

#%% Evaluate the model
eval_results = discharge_classifier.evaluate(
        input_fn=lambda : dataset_input_fn(eval_folder, train = False, batch_size = batch_size, num_epochs=1))
print(eval_results)

#%% Predict
results = discharge_classifier.predict(
        input_fn=lambda : dataset_input_fn(predict_folder, train = False, batch_size = batch_size, num_epochs=1))
results = list(results)

#%% Get labels from TFR files
with tf.Session() as sess:
    dataset = dataset_input_fn(eval_folder, train = False, batch_size = 2178, num_epochs=2)
    iterator = dataset.make_initializable_iterator()
    sess.run(iterator.initializer)  
    batch = iterator.get_next()
    labels = batch[1].eval()
    signal_ids = batch[0]["signal_ID"].eval()

#%% MCC calculations
#predicted_probs = np.array(list(map(lambda p: p['probabilities'],results)), dtype=np.float32)
#predicted_class = np.array(list(map(lambda c: c['classes'],results)), dtype=np.int16)

#Predict classes based on predicted probabilities and threshold
def score_model_measurement(probs,threshold):
    predicted = np.array([1 if x > threshold else 0 for x in probs[:,0]])           
    return predicted

#Print confusion matric and calculate Matthews correlation coefficient (MCC) 
def print_metrics(labels, scores):
    conf = confusion_matrix(labels, scores)
    print('                 Confusion matrix')
    print('                 Score positive    Score negative')
    print('Actual positive    %6d' % conf[1,1] + '             %5d' % conf[1,0])
    print('Actual negative    %6d' % conf[0,1] + '             %5d' % conf[0,0])
    print('')
    print('Accuracy  %0.2f' % accuracy_score(labels, scores))
    
    TP = conf[1,1]
    TN = conf[0,0]
    FP = conf[0,1]
    FN = conf[1,0]
    MCC = ((TP*TN) - (FP*FN)) / np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
    print('MCC = %0.2f' %MCC)
    return MCC

#Print confusion matrix and Matthews correlation coefficient (MCC) based on labels vs predictions
#predictions = score_model_measurement(predicted_probs,0.5)
#MCC = print_metrics(labels, predictions)

#%% Training run with a custom validation each epoch

loss_plot = np.array([])
accuracy_plot = np.array([])
MCC_plot = np.array([])
epochs_plot = np.array([])

for i in range(num_epochs):
    
    discharge_classifier.train(
    input_fn=lambda : dataset_input_fn(train_folder, train = True, batch_size = batch_size, num_epochs=1),
    steps=None)
    
    eval_results = discharge_classifier.evaluate(
        input_fn=lambda : dataset_input_fn(eval_folder, train = False, batch_size = batch_size, num_epochs=1))
    
    results = discharge_classifier.predict(
        input_fn=lambda : dataset_input_fn(eval_folder, train = False, batch_size = batch_size, num_epochs=1))
    predicted_probs = np.array(list(map(lambda p: p['probabilities'],results)), dtype=np.float32)
    
    scores = score_model_measurement(predicted_probs,0.5)
    MCC = print_metrics(labels, scores)
    
    loss_plot = np.append(loss_plot,eval_results['loss'])
    accuracy_plot = np.append(accuracy_plot,eval_results['accuracy'])
    if np.isnan(MCC):
        MCC=0
    MCC_plot = np.append(MCC_plot,MCC)
    epochs_plot = np.append(epochs_plot,i)
    
    plt.figure(figsize=(10,6))
    plt.plot(epochs_plot,loss_plot,color='lightcoral', marker='o', linestyle='--', linewidth=1.5, markersize=5, label='loss')
    plt.plot(epochs_plot,accuracy_plot,color='steelblue', marker='s', linestyle='-.', linewidth=1.5, markersize=5,label='accuracy')
    plt.plot(epochs_plot,MCC_plot,color='seagreen', marker='^', linestyle='-', linewidth=1.5, markersize=5,label='MCC')
    
    
    plt.xlabel('Epoch')
    plt.legend()
    plt.savefig("Learning_plot.png")
    plt.show()




