In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd /content/drive/MyDrive/HDA project/Csv Recordings

/content/drive/MyDrive/HDA project/Csv Recordings


# Import 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np
import datetime
import tensorflow as tf
import sklearn
from sklearn.model_selection import train_test_split
import re
import time
from sklearn.metrics import mean_absolute_error
from scipy import signal
from sklearn.preprocessing import StandardScaler


# Utility



In [None]:
## This function return

def data_load_for_recordings(csvs):
  '''
  Input:
    csvs: list of csv files
  Output:
    a list of dataframe well formatted
  '''
  res=[]
  for f in csvs:
    print(f)
    label=f.split(sep='_')[1]
    dftemp=pd.read_csv(f, sep='\t')
    cols=list(dftemp.columns)
    dftemp.drop(cols[:4], axis=1, inplace = True)
    dftemp.dropna(axis=0, inplace=True) 
    prova=pd.to_datetime(dftemp.OXYTimestamps[len(dftemp.OXYTimestamps)-1]-dftemp.OXYTimestamps[0], unit='ms')
    # print(prova)
    a_timedelta = prova - datetime.datetime(1970, 1, 1)
    seconds = a_timedelta.total_seconds()
    # print("file " ,f, "\nseconds", seconds)
    res.append(dftemp) 
  return res 

In [None]:
help(data_load_for_recordings)

Help on function data_load_for_recordings in module __main__:

data_load_for_recordings(csvs)
    Input:
      csvs: list of csv files
    Output:
      a list of dataframe well formatted



In [None]:
def split_long_record(data,freq,sec_to_split=60):
  '''
  Input:
    data: DataFrame
    freq: sample rate
    sec_to_split: window size
  Output:
    a list of dataframe splitted
  '''

  df_splitted=[]; j=0; i=1
  samples_one_minute= int(np.round(freq*sec_to_split))
  while data.shape[0]>samples_one_minute*i:
    # print(data.iloc[j:samples_one_minute*i])
    df_splitted.append(data.iloc[j:samples_one_minute*i])
    j=samples_one_minute*i
    i +=1

  print("missing ---> ", data.shape[0]-j, "rows")
  return df_splitted

In [None]:
def assign_label(dfs):
  '''
  This functions assign the label of the data recorded by the sensor

  Input:
      dfs: DataFrame
  Output:
      a list of dataframe with the right label.
  
  '''

  root2='/content/drive/MyDrive/HDA project/Labels'
  txts=[]; dfprova=[];
  for dirpath, dirnames, filenames in os.walk(root2):
    for file in filenames:
        txts.append(dirpath + '/' + file)
  txts.sort()
  # print(txts)
  for i in range(len(txts)-2):
    tempdf = split_long_record(dfs[7+i],60)
    with open(txts[i]) as f:
      lines = f.readlines()
      # print(lines)
      labels_from_file =[0 if j=='Na\n' else int(j) for j in lines]
      for j in range(len(labels_from_file)):
        tempdf[j]['Label']=labels_from_file[j]
        dfprova.append(tempdf[j])

  return dfprova
    



In [None]:
def list_csv_txt(folder_path):
  '''
  This function returns the list of csv and txt file present in the folder

  Input:
      folder_path: folder path
  Output:
      a list of dataframe splitted.
  
  '''
  csvs= []  #container for the various csvs contained in the directory
  df_synthetic = []  #container for temporary dataframes
  txts=[]
  df_synthetic_array=[]
  print(os.listdir(folder_path))
  # collect csv filenames and paths 
  for dirpath, dirnames, filenames in os.walk(folder_path):
    # print("dirpath", dirpath)
    # print("dirnames", dirnames)
    filenames.sort()
    print("founded", len(filenames)," filenames in", dirpath)
    

    for file in filenames:
      if len(re.findall('\.txt', file))!=0:
        txts.append(dirpath + '/' + file)
      elif len(re.findall('\.csv', file))!=0:
        csvs.append(dirpath + '/' + file)
      else:
        pass
      

  # # store each dataframe in the list
  csvs.sort()
  txts.sort()
  # if synthetic_standard:
  #   csvs=csvs[:87]
  #   txts=txts[:87] # tolgo i 20 breaths/minute che so o un macello!
  print(f"Founded {len(csvs)} CSV files...")
  print(f"Founded {len(txts)} TXT files...")
  return csvs, txts

In [None]:
def get_frequency(txts):
  ''' 
  this function read the frequency in the txt file provided (for example in the txt files of the generated signal)
  '''
  hz=0
  with open(txts[0]) as f1:
    lines=f1.readlines()
  for content in lines:
    if content.find("Sampling frequency")!=-1:
      hz = float(re.findall("\d+\.*\d+", content)[0])
  return hz

In [None]:
def get_label_recorded(path):
  '''
  This functions return the label of the pulse oximiter recordings

  Input:
      path: folder path
  Output:
      a dictionary
  
  '''
  txts=[]; dfprova=[];
  for dirpath, dirnames, filenames in os.walk(path):
    for file in filenames:
        txts.append(dirpath + '/' + file)
  txts.sort()
  label_dict={}
  for i in range(len(txts)):
    with open(txts[i]) as f:
      lines = f.readlines()
      # print(lines)
      label_dict[i]=[0 if j=='Na\n' else int(j) for j in lines]
  return label_dict

In [None]:
def get_recorded(dfs, freq, label_dic):
  '''
  This functions return the data of the pulse oximiter recordings and the label

  Input:
      dfs: DataFrame
      freq: sample rate
      label_dic: dictionary of labels
  Output:
      Array, List
  
  '''
  merged=0; first_time=True; recorded_array=0
  samples_one_minute=int(np.round(freq*60))
  y=[]
  for i in range(len(dfs)):
    label=True
    listdelcaz=[]
    print("-"*60)
    if dfs[i].shape[0]<samples_one_minute:
      print(f"dfs {i} too small! go further!")
      continue
    list_of_dfs=split_long_record(dfs[i],freq,sec_to_split=60)
    print(f"Splitting dfs {i} in {len(list_of_dfs)}")
    for j in range(len(list_of_dfs)):
      if i in list(label_dic.keys()):
        y.append(label_dic[i][j])
        df_array=np.array(list_of_dfs[j].PPGvalue)
        listdelcaz.append(df_array[:samples_one_minute+1])
        label=True
      else:
        label=False

    print("labels", len(y))
    if label:
      recorded_array=np.array( listdelcaz)
    # recorded_array = np.reshape(recorded_array, -1)
      print("recorded array",recorded_array.shape)
      if first_time:
        merged=recorded_array
        first_time=False
      else:
        merged = np.concatenate((merged, recorded_array), axis=0)
    print("merged", merged.shape)
  print("Final Merged:", merged.shape)
  return merged,y


In [None]:
def label_for_synthetic(file):
  '''
  This functions return the label of a synthetic signal

  Input:
      file: file path
  Output:
      Integer
  
  '''
  label=0; fermitutti=False; soglia=0
  with open(file) as f1:
      lines=f1.readlines()
  for content in lines:
    if content.find("Simulated heart rate")!=-1:
      hr = int(re.findall("\d+", content)[0])
      if hr!=80:
        return label
    elif content.find("Simulated respiratory rate")!=-1:
      return int(re.findall("\d+", content)[0])
  print(f"founded label {label}")
  return label

In [None]:
def check_repeated_values(idx, txts):
  with open(txts[idx]) as f1:
    lines_next=f1.readlines()
  for content_next in lines_next:
    if content_next.find("Simulated respiratory rate")!=-1:
      rr_next = int(re.findall("\d+", content_next)[0])
      return rr_next==20


In [None]:
def read_csv_file(csvs):
  '''
  This functions returns synthetic signal

  Input:
      csvs: list of csv file
  Output:
      list of array
  
  '''    
  df_synthetic_array=[]; true_shape=0
  # df_synthetic=[]
  for k in range(len(csvs)):
    dftemp=pd.read_csv(csvs[k], names=['ECG', 'PPG'],sep=',')
    dfarray=dftemp.drop(['ECG'], axis=1).to_numpy()
    dfarray=dfarray.reshape(-1)
    # if i==0:
    #   true_shape = dfarray.shape[0]
    # df_synthetic.append(dftemp)
    df_synthetic_array.append(dfarray) 
  return df_synthetic_array

In [None]:
def check_datetime(df):
  print(pd.to_datetime(df.OXYTimestamps[0], unit='ms'))
  print(pd.to_datetime(df.OXYTimestamps[len(df.OXYTimestamps)-1], unit='ms'))
  # print(pd.to_datetime(df.OXYTimestamps[len(df.OXYTimestamps)-1]-df.OXYTimestamps[0], unit='ms'))
  prova=pd.to_datetime(df.OXYTimestamps[len(df.OXYTimestamps)-1]-df.OXYTimestamps[0], unit='ms')
  print(prova)
  # prova.strftime("%M:%S:%f")
  a_timedelta = prova - datetime.datetime(1970, 1, 1)
  seconds = a_timedelta.total_seconds()

  print(seconds)

In [None]:
def get_synthetic_data(csvs, txts, df_synthetic_array):  
  '''
  This functions returns well formatted array of synthetic signal
  Input:
      csvs: ?? is used??
      txts: txt file (in synthetic data every csv file has a txt file with the documentation)
      df_synthetic_array: list of array
  Output:
      Array, Integer
  
  '''   
  start_time = time. time()
  merged=0
  First_time=True 
  hz=get_frequency(txts)
  samples_one_minute=int(np.round(hz*60))
  # print(samples_one_minute)
  average=0; temp_array=0
  for i in range(len(df_synthetic_array)):
    if i!=0:
      # print(ar_to_merge.shape)
      if merged.shape[0]%(ar_to_merge.shape[0]*20)==0:
        print("what merged so far: ", merged.shape)
    flag=True; idx_start=0; samples=[];temp_list=[]
    while flag:
      idx_end=int(idx_start+samples_one_minute)
      if idx_end>df_synthetic_array[i].shape[0]-1: 
        break
        flag=False
      else:
        samples.append(df_synthetic_array[i][idx_start:idx_end])
        idx_start=idx_start+int(hz)
    ar_to_merge=np.stack(samples, axis=0 ) #(150,30000)
    # print(ar_to_merge.shape)
    if First_time:
      merged = ar_to_merge
      First_time=False
    else:
      merged = np.concatenate((merged, ar_to_merge), axis=0)
    # merged=np.stack( df_synthetic_array, axis=0 )

  print("final merge shape",merged.shape) 
  print("-"*40)  
  print("Time Execution: ")
  print("--- %s seconds ---" % np.round(time. time() - start_time,2))
  return merged, len(samples)

In [None]:
def read_bimdc(csvs):
  '''
  This functions returns well formatted array of BIMDC signal
  Input:
      csvs: csv file
  Output:
      Array
  
  ''' 
  df_real_array=[]; df_labels_real=[]
  for i in range(len(csvs)): 
    if len(re.findall('\_Signals', csvs[i]))!=0:
      dftemp=pd.read_csv(csvs[i])
      dfarray=dftemp[' PLETH'].to_numpy()
      dfarray=dfarray.reshape(-1)
      # print(dfarray.shape) 
      df_real_array.append(dfarray)
    elif len(re.findall('\_Numerics', csvs[i]))!=0:
      dftemp=pd.read_csv(csvs[i])
      # print( dftemp.head())
      # print(dftemp.columns)
      # dflabels=dftemp.drop(['Time [s]', 'HR', 'PULSE', 'SpO2'], axis=1).to_numpy()
      dflabels = dftemp[['Time [s]',' RESP']]
      # print(dflabels.shape)
      df_labels_real.append(dflabels)
    else:
      continue
  return df_real_array, df_labels_real 

def get_bimdc(list_array_bimdc):
  idx_start=0
  merged=np.ones(shape=(1,1))
  for i in range(len(list_array_bimdc)):
    j=0; df_splitted=[]
    while (j+7500)<list_array_bimdc[i].shape[0]:
      temp = list_array_bimdc[i][j:j+7500]
      j=j+7500
      f = signal.resample(temp, 5801)
      # print(f.shape)
      df_splitted.append(f)


    prova=np.array(df_splitted)
    # print(prova.shape)
    if merged.shape==(1,1):
      merged=prova
    else:
      merged = np.concatenate((merged, prova), axis=0)
  return merged

def get_bimdc_label(list_label_bimdc):
  '''
  This function returns labels  of BIMDC signal
  Input:
      list_label_bimdc: list
  Output:
      Array, List
  
  ''' 
  labels_real=[]
  for i in range(len(list_label_bimdc)):
    for j in range(1,9):
      labels_real.append(list_label_bimdc[i].iloc[60*j,1])
  indices = [i for i, x in enumerate(labels_real) if x != 0 and pd.isna(x)==False]
  labels_real = [int(labels_real[index]) for index in indices]
  return labels_real, indices
 


# Loading real data and preparing data structure

## Loading the files

In [None]:
folder_path_syn_ar02= "/content/drive/MyDrive/HDA project/Synthetic Data/rrest-syn_csv_ar_02"
folder_path_syn_ar03= "/content/drive/MyDrive/HDA project/Synthetic Data/rrest-syn_csv_ar_03"
folder_path_syn_ar04= "/content/drive/MyDrive/HDA project/Synthetic Data/rrest-syn_csv_ar_04"
folder_path_syn_arfm_3= "/content/drive/MyDrive/HDA project/Synthetic Data/rrest-syn_csv_arfm_3"
folder_path_syn_arfm_4= "/content/drive/MyDrive/HDA project/Synthetic Data/rrest-syn_csv_arfm_4"
folder_path_syn_sig_range_3= "/content/drive/MyDrive/HDA project/Synthetic Data/rrest-syn_csv_sig_range_3"
folder_path_syn_sig_range_6= "/content/drive/MyDrive/HDA project/Synthetic Data/rrest-syn_csv_sig_range_6"
folder_path_syn_sig_range_9= "/content/drive/MyDrive/HDA project/Synthetic Data/rrest-syn_csv_sig_range_9"

folder_path_little = "/content/drive/MyDrive/HDA project/Synthetic Data/rrest-syn_csv_modified_hz"
folder_path_real = '/content/drive/MyDrive/HDA project/Synthetic Data/BIMDC'

csvs_syn, txts_syn= list_csv_txt(folder_path_little)
csvs_syn_ar02, txts_syn_ar02= list_csv_txt(folder_path_syn_ar02)
csvs_syn_ar03, txts_syn_ar03= list_csv_txt(folder_path_syn_ar03)
csvs_syn_ar04, txts_syn_ar04= list_csv_txt(folder_path_syn_ar04)

csvs_syn_arfm3, txts_syn_arfm3= list_csv_txt(folder_path_syn_arfm_3)
csvs_syn_arfm4, txts_syn_arfm4= list_csv_txt(folder_path_syn_arfm_4)

csvs_syn_range3, txts_syn_range3= list_csv_txt(folder_path_syn_sig_range_3)
csvs_syn_range6, txts_syn_range6= list_csv_txt(folder_path_syn_sig_range_6)
csvs_syn_range9, txts_syn_range9= list_csv_txt(folder_path_syn_sig_range_9)

# csvs_syn_range6, txts_syn_range6= list_csv_txt(folder_path_syn_sig_range_6)

csvs_real, txts_real= list_csv_txt(folder_path_real)

['rrest-syn011_fix.txt', 'rrest-syn084_data.csv', 'rrest-syn041_data.csv', 'rrest-syn055_data.csv', 'rrest-syn035_data.csv', 'rrest-syn057_data.csv', 'rrest-syn012_data.csv', 'rrest-syn080_fix.txt', 'rrest-syn043_fix.txt', 'rrest-syn049_fix.txt', 'rrest-syn026_data.csv', 'rrest-syn009_fix.txt', 'rrest-syn071_data.csv', 'rrest-syn047_data.csv', 'rrest-syn078_fix.txt', 'rrest-syn084_fix.txt', 'rrest-syn018_data.csv', 'rrest-syn077_fix.txt', 'rrest-syn001_data.csv', 'rrest-syn028_data.csv', 'rrest-syn023_fix.txt', 'rrest-syn030_fix.txt', 'rrest-syn087_data.csv', 'rrest-syn040_fix.txt', 'rrest-syn034_data.csv', 'rrest-syn079_data.csv', 'rrest-syn013_fix.txt', 'rrest-syn036_fix.txt', 'rrest-syn027_fix.txt', 'rrest-syn069_data.csv', 'rrest-syn050_data.csv', 'rrest-syn072_data.csv', 'rrest-syn019_data.csv', 'rrest-syn004_data.csv', 'rrest-syn032_data.csv', 'rrest-syn010_data.csv', 'rrest-syn070_fix.txt', 'rrest-syn069_fix.txt', 'rrest-syn056_fix.txt', 'rrest-syn044_fix.txt', 'rrest-syn037_fix

In [None]:

## Data are chosen until 171 because the other parts are related to the ECG.

csvs_syn_ar02=csvs_syn_ar02[:171]
csvs_syn_ar03=csvs_syn_ar03[:171]
csvs_syn_ar04=csvs_syn_ar04[:171]

csvs_syn_arfm3=csvs_syn_arfm3[:171]
csvs_syn_arfm4=csvs_syn_arfm4[:171]

csvs_syn_range3=csvs_syn_range3[:171]
csvs_syn_range6=csvs_syn_range6[:171]
csvs_syn_range9=csvs_syn_range9[:171]

csvs_syn=csvs_syn[:171]



## Pulse oximiter Data

In [None]:
folder_path_recorded= "/content/drive/MyDrive/HDA project/Csv Recordings"

csvs_recorded, txts_recorded= list_csv_txt(folder_path_recorded)

['REMOCOP_19_2022127_17h55.csv', 'REMOCOP_17_2022127_18h13.csv', 'REMOCOP_20_2022127_18h15.csv', 'REMOCOP_18_2022127_18h18.csv', 'REMOCOP_18_2022127_18h21.csv', 'REMOCOP_VIDEO_2022127_21h50.csv', 'REMOCOP_VIDEO_2022127_21h59.csv', 'REMOCOP_VIDEO_2022127_22h11.csv', '.ipynb_checkpoints', '20211211_20_15h43.csv', '20211211_25_15h48.csv', 'REMOCOP_VIDEO_2022128_12h8.csv', 'REMOCOP_VIDEO_2022128_13h42.csv', 'description.txt']
founded 13  filenames in /content/drive/MyDrive/HDA project/Csv Recordings
founded 0  filenames in /content/drive/MyDrive/HDA project/Csv Recordings/.ipynb_checkpoints
Founded 12 CSV files...
Founded 1 TXT files...


In [None]:


dfs = data_load_for_recordings(csvs_recorded)


/content/drive/MyDrive/HDA project/Csv Recordings/20211211_20_15h43.csv
/content/drive/MyDrive/HDA project/Csv Recordings/20211211_25_15h48.csv
/content/drive/MyDrive/HDA project/Csv Recordings/REMOCOP_17_2022127_18h13.csv
/content/drive/MyDrive/HDA project/Csv Recordings/REMOCOP_18_2022127_18h18.csv
/content/drive/MyDrive/HDA project/Csv Recordings/REMOCOP_18_2022127_18h21.csv
/content/drive/MyDrive/HDA project/Csv Recordings/REMOCOP_19_2022127_17h55.csv
/content/drive/MyDrive/HDA project/Csv Recordings/REMOCOP_20_2022127_18h15.csv
/content/drive/MyDrive/HDA project/Csv Recordings/REMOCOP_VIDEO_2022127_21h50.csv
/content/drive/MyDrive/HDA project/Csv Recordings/REMOCOP_VIDEO_2022127_21h59.csv
/content/drive/MyDrive/HDA project/Csv Recordings/REMOCOP_VIDEO_2022127_22h11.csv
/content/drive/MyDrive/HDA project/Csv Recordings/REMOCOP_VIDEO_2022128_12h8.csv
/content/drive/MyDrive/HDA project/Csv Recordings/REMOCOP_VIDEO_2022128_13h42.csv


In [None]:
label_dict = get_label_recorded('/content/drive/MyDrive/HDA project/Labels')
recorded, label_recorded = get_recorded(dfs, get_frequency(txts_recorded), label_dict)

------------------------------------------------------------
dfs 0 too small! go further!
------------------------------------------------------------
missing --->  955 rows
Splitting dfs 1 in 1
labels 1
recorded array (1, 5801)
merged (1, 5801)
------------------------------------------------------------
dfs 2 too small! go further!
------------------------------------------------------------
missing --->  395 rows
Splitting dfs 3 in 1
labels 2
recorded array (1, 5801)
merged (2, 5801)
------------------------------------------------------------
missing --->  119 rows
Splitting dfs 4 in 1
labels 3
recorded array (1, 5801)
merged (3, 5801)
------------------------------------------------------------
missing --->  1142 rows
Splitting dfs 5 in 1
labels 4
recorded array (1, 5801)
merged (4, 5801)
------------------------------------------------------------
missing --->  666 rows
Splitting dfs 6 in 1
labels 5
recorded array (1, 5801)
merged (5, 5801)
---------------------------------------

In [None]:
scaler= StandardScaler()
scaler.fit(recorded)
recorded_scaled = scaler.transform(recorded)

In [None]:
print(len(label_recorded))
indices = [i for i, x in enumerate(label_recorded) if x != 0]
label_recorded = [label_recorded[index] for index in indices]
print(len(label_recorded))

recorded_scaled=recorded_scaled[indices]
recorded_scaled=recorded_scaled[..., np.newaxis]
print(recorded_scaled.shape)

64
53
(53, 5801, 1)


## BIDMC

In [None]:
list_array_bimdc, list_label_bimdc= read_bimdc(csvs_real)
print(len(list_array_bimdc))
print(len(list_label_bimdc))
print(list_array_bimdc[0].shape)
print(list_label_bimdc  [0].shape)


53
53
(60001,)
(481, 2)


In [None]:
X_real = get_bimdc(list_array_bimdc)
y_real, indices = get_bimdc_label(list_label_bimdc)
X_real = X_real[indices]

print(X_real.shape)
print(len(y_real))

(418, 5801)
418


# Model

In [None]:
def residual_module(layer_in, n_filters, kernel=3):
  merge_input = layer_in
  # check if the number of filters needs to be increase, assumes channels last format
  # if layer_in.shape[-1] != n_filters:
  merge_input = tf.keras.layers.Conv1D(n_filters, kernel_size=1, padding='same', strides=2)(layer_in)
  conv1 = tf.keras.layers.Conv1D(n_filters, kernel_size=kernel,padding='same', strides=2)(layer_in)
  conv2 = tf.keras.layers.Conv1D(n_filters, kernel_size=kernel,padding='same', strides=1)(conv1)
  conv3 = tf.keras.layers.Conv1D(n_filters, kernel_size=kernel, padding='same',strides=1)(conv2)
  conv4 = tf.keras.layers.Conv1D(n_filters, kernel_size=kernel, padding='same',strides=1)(conv3)


  # add filters, assumes filters/channels last
  layer_out = tf.keras.layers.add([conv4, merge_input])
  # activation function
  layer_out = tf.keras.layers.Activation('relu')(layer_out)
  return layer_out
# define model input
# prova=X_train.reshape(1,30000, 20160)
def create_model():
  visible =  tf.keras.layers.Input(shape=(X_train.shape[1],1) )

  res_block = residual_module(visible,n_filters=6 ) 
  res_block = residual_module(res_block, n_filters=12)
  res_block = residual_module(res_block, n_filters=12)
  res_block = residual_module(res_block, n_filters=12)
  res_block = residual_module(res_block, n_filters=12)


  x = tf.keras.layers.AveragePooling1D(strides=2, padding='same') (res_block)

  x = tf.keras.layers.Flatten() (x)
  x = tf.keras.layers.Dense(20) (x)
  x = tf.keras.layers.Dense(10) (x)
  x = tf.keras.layers.Dense(1, activation="relu") (x)


  # create model
  model = tf.keras.Model(inputs=visible, outputs=x)
  model.compile(optimizer='Adam', loss='mean_absolute_error')

  return model

# Experiments

## Standard: AM =0.1 BW = 0.05, General Amplitude = 1

In [None]:
df_synthetic_array_syn=read_csv_file(csvs_syn)
X_syn, dim=get_synthetic_data(csvs_syn, txts_syn, df_synthetic_array_syn)


what merged so far:  (3040, 5801)
what merged so far:  (6080, 5801)
what merged so far:  (9120, 5801)
what merged so far:  (12160, 5801)
final merge shape (13224, 5801)
----------------------------------------
Time Execution: 
--- 7.13 seconds ---


In [None]:
X_scaled_syn = scaler.transform(X_syn) 
X_scaled_real = scaler.transform(X_real) 
print(X_scaled_syn.shape)
print(X_scaled_real.shape)


(13224, 5801)
(418, 5801)


In [None]:
X = np.concatenate((X_scaled_syn, X_scaled_real), axis=0)
X = X[..., np.newaxis] 

print(X.shape)



(13642, 5801, 1)


In [None]:
label_synthetic = ([i for i in range(4,61,2)]*3)*1
y_syn = [label_synthetic[i//dim] for i in range(len(label_synthetic)*dim)]

y=y_syn+y_real
print(len(y))


13642


In [None]:
X_train, X_test, y_train, y_test=train_test_split(X,y, test_size=0.3, 
                                                  shuffle=True, random_state=1)
# X_train, X_val, y_train, y_val=train_test_split(X_train,y_train, test_size=0.2, 
#                                                   shuffle=True, random_state=1)


In [None]:

y_train = np.array(y_train)
y_test = np.array(y_test)


print("X_train:",X_train.shape, "y_train:",len(y_train))
print("X_test:",X_test.shape, "y_test:",len(y_test))





X_train: (9549, 5801, 1) y_train: 9549
X_test: (4093, 5801, 1) y_test: 4093


In [None]:
import numpy as np
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import KFold

# Merge inputs and targets
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=5, shuffle=True)
loss_per_fold=[]
# K-fold Cross Validation model evaluation
fold_no = 1
es = EarlyStopping(monitor='val_loss', mode='min',verbose=1, patience=5)

for train, test in kfold.split(inputs, targets):

  # Define the model architecture
  model = create_model()

  print('------------------------------------------------------------------------')
  print(f'Training for fold {fold_no} ...')

  # Fit data to model
  history = model.fit(inputs[train], targets[train], validation_data=(inputs[test], targets[test]), batch_size=254, epochs =100, 
                      callbacks=[es])
  # Generate generalization metrics
  scores = model.evaluate(recorded_scaled, np.array(label_recorded), verbose=0)
  # print(f'Score for fold {fold_no}: {model.metrics_names} of {scores}')
  print(f'MY DATA: Score for fold {fold_no}: {model.metrics_names} of {scores}')
  loss_per_fold.append(scores)
  # Increase fold number
  fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 26: early stopping
MY DATA: Score for fold 1: ['loss'] of 2.7290632724761963
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100


In [None]:
loss_per_fold

[2.7290632724761963,
 4.857781887054443,
 6.990306854248047,
 5.760232448577881,
 2.3988912105560303]

In [None]:
print(f"mean {np.mean(loss_per_fold)} and std {np.std(loss_per_fold)}")

mean 4.547255134582519 and std 1.7582679961096377


## AM=0.2 and BW=0.2

In [None]:
df_synthetic_array_syn_ar02=read_csv_file(csvs_syn_ar02)
X_syn_ar02, dim_ar02=get_synthetic_data(csvs_syn_ar02, txts_syn_ar02, df_synthetic_array_syn_ar02)


what merged so far:  (4840, 5801)
what merged so far:  (9680, 5801)
what merged so far:  (14520, 5801)
what merged so far:  (19360, 5801)
what merged so far:  (24200, 5801)
what merged so far:  (29040, 5801)
what merged so far:  (33880, 5801)
what merged so far:  (38720, 5801)
final merge shape (41382, 5801)
----------------------------------------
Time Execution: 
--- 40.53 seconds ---


In [None]:
X_scaled_syn_ar02 = scaler.transform(X_syn_ar02) 
X_scaled_real = scaler.transform(X_real) 
print(X_scaled_syn_ar02.shape)
print(X_scaled_real.shape)


(41382, 5801)
(418, 5801)


In [None]:
X_ar02 = np.concatenate((X_scaled_syn_ar02, X_scaled_real), axis=0)
X_ar02 = X_ar02[..., np.newaxis] 

print(X_ar02.shape)



(41800, 5801, 1)


In [None]:
label_synthetic = ([i for i in range(4,61)]*3)*1
y_syn = [label_synthetic[i//dim_ar02] for i in range(len(label_synthetic)*dim_ar02)]

y=y_syn+y_real
print(len(y))


41800


In [None]:
X_train, X_test, y_train, y_test=train_test_split(X_ar02,y, test_size=0.3, 
                                                  shuffle=True, random_state=1)
# X_train, X_val, y_train, y_val=train_test_split(X_train,y_train, test_size=0.2, 
#                                                   shuffle=True, random_state=1)


In [None]:

y_train = np.array(y_train)
y_test = np.array(y_test)


print("X_train:",X_train.shape, "y_train:",len(y_train))
print("X_test:",X_test.shape, "y_test:",len(y_test))





X_train: (29260, 5801, 1) y_train: 29260
X_test: (12540, 5801, 1) y_test: 12540


In [None]:
import numpy as np
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import KFold

# Merge inputs and targets
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=5, shuffle=True)
loss_per_fold=[]
# K-fold Cross Validation model evaluation
fold_no = 1
es = EarlyStopping(monitor='val_loss', mode='min',verbose=1, patience=5)

for train, test in kfold.split(inputs, targets):

  # Define the model architecture
  model = create_model()

  print('------------------------------------------------------------------------')
  print(f'Training for fold {fold_no} ...')

  # Fit data to model
  history = model.fit(inputs[train], targets[train], validation_data=(inputs[test], targets[test]), batch_size=254, epochs =50, 
                      callbacks=[es])
  # Generate generalization metrics
  scores = model.evaluate(recorded_scaled, np.array(label_recorded), verbose=0)
  # print(f'Score for fold {fold_no}: {model.metrics_names} of {scores}')
  print(f'MY DATA: Score for fold {fold_no}: {model.metrics_names} of {scores}')
  loss_per_fold.append(scores)

  # Increase fold number
  fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 26: early stopping
MY DATA: Score for fold 1: ['loss'] of 2.6910500526428223
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/

In [None]:
loss_per_fold

[2.6910500526428223,
 3.6491811275482178,
 2.3197333812713623,
 3.2840518951416016,
 2.271413803100586]

In [None]:
sum(loss_per_fold)/len(loss_per_fold)

2.843086051940918

In [None]:
print(f"mean {np.mean(loss_per_fold)} and std {np.std(loss_per_fold)}")

mean 2.843086051940918 and std 0.5418601791720292


## AM=0.3 and BW=0.3

In [None]:
df_synthetic_array_syn_ar03=read_csv_file(csvs_syn_ar03)
X_syn_ar03, dim_ar03=get_synthetic_data(csvs_syn_ar03, txts_syn_ar03, df_synthetic_array_syn_ar03)


what merged so far:  (4840, 5801)
what merged so far:  (9680, 5801)
what merged so far:  (14520, 5801)
what merged so far:  (19360, 5801)
what merged so far:  (24200, 5801)
what merged so far:  (29040, 5801)
what merged so far:  (33880, 5801)
what merged so far:  (38720, 5801)
final merge shape (41382, 5801)
----------------------------------------
Time Execution: 
--- 36.55 seconds ---


In [None]:
X_scaled_syn_ar03 = scaler.transform(X_syn_ar03) 
X_scaled_real = scaler.transform(X_real) 
print(X_scaled_syn_ar03.shape)
print(X_scaled_real.shape)


(41382, 5801)
(418, 5801)


In [None]:
X_ar03 = np.concatenate((X_scaled_syn_ar03, X_scaled_real), axis=0)
X_ar03 = X_ar03[..., np.newaxis] 

print(X_ar03.shape)



(41800, 5801, 1)


In [None]:
label_synthetic = ([i for i in range(4,61)]*3)*1
y_syn = [label_synthetic[i//dim_ar03] for i in range(len(label_synthetic)*dim_ar03)]

y=y_syn+y_real
print(len(y))


41800


In [None]:
X_train, X_test, y_train, y_test=train_test_split(X_ar03,y, test_size=0.3, 
                                                  shuffle=True, random_state=1)
# X_train, X_val, y_train, y_val=train_test_split(X_train,y_train, test_size=0.2, 
#                                                   shuffle=True, random_state=1)


In [None]:

y_train = np.array(y_train)
y_test = np.array(y_test)


print("X_train:",X_train.shape, "y_train:",len(y_train))
print("X_test:",X_test.shape, "y_test:",len(y_test))





X_train: (29260, 5801, 1) y_train: 29260
X_test: (12540, 5801, 1) y_test: 12540


In [None]:
import numpy as np
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import KFold

# Merge inputs and targets
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=5, shuffle=True)
loss_per_fold=[]
model_list=[]

# K-fold Cross Validation model evaluation
fold_no = 1
es = EarlyStopping(monitor='val_loss', mode='min',verbose=1, patience=5)

for train, test in kfold.split(inputs, targets):

  # Define the model architecture
  model = create_model()

  print('------------------------------------------------------------------------')
  print(f'Training for fold {fold_no} ...')

  # Fit data to model
  history = model.fit(inputs[train], targets[train], validation_data=(inputs[test], targets[test]), batch_size=254, epochs =50, 
                      callbacks=[es])
  # Generate generalization metrics
  scores = model.evaluate(recorded_scaled, np.array(label_recorded), verbose=0)
  # print(f'Score for fold {fold_no}: {model.metrics_names} of {scores}')
  print(f'MY DATA: Score for fold {fold_no}: {model.metrics_names} of {scores}')
  loss_per_fold.append(scores)
  model_list.append(model)
  # Increase fold number
  fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 17: early stopping
MY DATA: Score for fold 1: ['loss'] of 3.2484452724456787
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 26: early stopping
MY DATA: Score for fold 2: ['loss'] of 3.475492000579834
------------------------------------------------------------------------
Training for fold 3 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Ep

In [None]:
loss_per_fold

[3.2484452724456787,
 3.475492000579834,
 3.5763347148895264,
 2.7168147563934326,
 2.4289214611053467]

In [None]:
sum(loss_per_fold)/len(loss_per_fold)

3.089201641082764

In [None]:
print(f"mean: {np.mean(loss_per_fold)} and std: {np.std(loss_per_fold)}")

mean: 3.089201641082764 and std: 0.4441889948042576


## AM=0.4 and BW=0.4

In [None]:
df_synthetic_array_syn_ar04=read_csv_file(csvs_syn_ar04)
X_syn_ar04, dim_ar04=get_synthetic_data(csvs_syn_ar04, txts_syn_ar04, df_synthetic_array_syn_ar04)


what merged so far:  (4840, 5801)
what merged so far:  (9680, 5801)
what merged so far:  (14520, 5801)
what merged so far:  (19360, 5801)
what merged so far:  (24200, 5801)
what merged so far:  (29040, 5801)
what merged so far:  (33880, 5801)
what merged so far:  (38720, 5801)
final merge shape (41382, 5801)
----------------------------------------
Time Execution: 
--- 40.15 seconds ---


In [None]:
X_scaled_syn_ar04 = scaler.transform(X_syn_ar04) 
X_scaled_real = scaler.transform(X_real) 
print(X_scaled_syn_ar04.shape)
print(X_scaled_real.shape)


(41382, 5801)
(418, 5801)


In [None]:
X_ar04 = np.concatenate((X_scaled_syn_ar04, X_scaled_real), axis=0)
X_ar04 = X_ar04[..., np.newaxis] 

print(X_ar04.shape)



(41800, 5801, 1)


In [None]:
label_synthetic = ([i for i in range(4,61)]*3)*1
y_syn = [label_synthetic[i//dim_ar04] for i in range(len(label_synthetic)*dim_ar04)]

y=y_syn+y_real
print(len(y))


41800


In [None]:
X_train, X_test, y_train, y_test=train_test_split(X_ar04,y, test_size=0.3, 
                                                  shuffle=True, random_state=1)
# X_train, X_val, y_train, y_val=train_test_split(X_train,y_train, test_size=0.2, 
#                                                   shuffle=True, random_state=1)


In [None]:

y_train = np.array(y_train)
y_test = np.array(y_test)


print("X_train:",X_train.shape, "y_train:",len(y_train))
print("X_test:",X_test.shape, "y_test:",len(y_test))





X_train: (29260, 5801, 1) y_train: 29260
X_test: (12540, 5801, 1) y_test: 12540


In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import KFold

# Merge inputs and targets
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=5, shuffle=True)
loss_per_fold=[]
# K-fold Cross Validation model evaluation
fold_no = 1
es = EarlyStopping(monitor='val_loss', mode='min',verbose=1, patience=5)

for train, test in kfold.split(inputs, targets):

  # Define the model architecture
  model = create_model()

  print('------------------------------------------------------------------------')
  print(f'Training for fold {fold_no} ...')

  # Fit data to model
  history = model.fit(inputs[train], targets[train], validation_data=(inputs[test], targets[test]), batch_size=254, epochs =50, 
                      callbacks=[es])
  # Generate generalization metrics
  scores = model.evaluate(recorded_scaled, np.array(label_recorded), verbose=0)
  # print(f'Score for fold {fold_no}: {model.metrics_names} of {scores}')
  print(f'MY DATA: Score for fold {fold_no}: {model.metrics_names} of {scores}')
  loss_per_fold.append(scores)

  # Increase fold number
  fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 16: early stopping
MY DATA: Score for fold 1: ['loss'] of 2.5298235416412354
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 19: early stopping
MY DATA: Score for fold 2: ['loss'] of 16.902530670166016
------------------------------------------------------------------------
Training for fold 3 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 

In [None]:
loss_per_fold

[2.5298235416412354,
 16.902530670166016,
 2.4684245586395264,
 2.6180713176727295,
 3.422086238861084]

In [None]:
print(f"mean {np.mean(loss_per_fold)} and std {np.std(loss_per_fold)}")


mean 5.588187265396118 and std 5.66770601027234


## FM = 0.03

In [None]:
df_synthetic_array_syn_arfm3=read_csv_file(csvs_syn_arfm3)
X_syn_arfm3, dim_arfm3=get_synthetic_data(csvs_syn_arfm3, txts_syn_arfm3, df_synthetic_array_syn_arfm3)


what merged so far:  (4840, 5801)
what merged so far:  (9680, 5801)
what merged so far:  (14520, 5801)
what merged so far:  (19360, 5801)
what merged so far:  (24200, 5801)
what merged so far:  (29040, 5801)
what merged so far:  (33880, 5801)
what merged so far:  (38720, 5801)
final merge shape (41382, 5801)
----------------------------------------
Time Execution: 
--- 36.24 seconds ---


In [None]:
X_scaled_syn_arfm3 = scaler.transform(X_syn_arfm3) 
X_scaled_real = scaler.transform(X_real) 
print(X_scaled_syn_arfm3.shape)
print(X_scaled_real.shape)


(41382, 5801)
(418, 5801)


In [None]:
X_arfm3 = np.concatenate((X_scaled_syn_arfm3, X_scaled_real), axis=0)
X_arfm3 = X_arfm3[..., np.newaxis] 

print(X_arfm3.shape)



(41800, 5801, 1)


In [None]:
label_synthetic = ([i for i in range(4,61)]*3)*1
y_syn = [label_synthetic[i//dim_arfm3] for i in range(len(label_synthetic)*dim_arfm3)]

y=y_syn+y_real
print(len(y))


41800


In [None]:
X_train, X_test, y_train, y_test=train_test_split(X_arfm3,y, test_size=0.3, 
                                                  shuffle=True, random_state=1)
# X_train, X_val, y_train, y_val=train_test_split(X_train,y_train, test_size=0.2, 
#                                                   shuffle=True, random_state=1)


In [None]:

y_train = np.array(y_train)
y_test = np.array(y_test)


print("X_train:",X_train.shape, "y_train:",len(y_train))
print("X_test:",X_test.shape, "y_test:",len(y_test))





X_train: (29260, 5801, 1) y_train: 29260
X_test: (12540, 5801, 1) y_test: 12540


In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import KFold

# Merge inputs and targets
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=5, shuffle=True)
loss_per_fold=[]
# K-fold Cross Validation model evaluation
fold_no = 1
es = EarlyStopping(monitor='val_loss', mode='min',verbose=1, patience=5)

for train, test in kfold.split(inputs, targets):

  # Define the model architecture
  model = create_model()

  print('------------------------------------------------------------------------')
  print(f'Training for fold {fold_no} ...')

  # Fit data to model
  history = model.fit(inputs[train], targets[train], validation_data=(inputs[test], targets[test]), batch_size=254, epochs =50, 
                      callbacks=[es])
  # Generate generalization metrics
  scores = model.evaluate(recorded_scaled, np.array(label_recorded), verbose=0)
  # print(f'Score for fold {fold_no}: {model.metrics_names} of {scores}')
  print(f'MY DATA: Score for fold {fold_no}: {model.metrics_names} of {scores}')
  loss_per_fold.append(scores)

  # Increase fold number
  fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
MY DATA: Score for fold 1: ['loss'] of 15.188679695129395
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 20: early stopping
MY DATA: Score for fold 2: ['loss'] of 3.8072803020477295
------------------------------------------------------------------------
Training for fold 3 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
MY DATA: Score for fold 3: ['loss'] of 15.188679695129395
------------------------------------------------------------------------
Training for fold 4 ...
Ep

In [None]:
loss_per_fold

[15.188679695129395,
 3.8072803020477295,
 15.188679695129395,
 3.699939727783203,
 4.07932710647583]

In [None]:
print(f"mean {np.mean(loss_per_fold)} and std {np.std(loss_per_fold)}")


mean 8.392781305313111 and std 5.550206095229677


## FM = 0.025

In [None]:
df_synthetic_array_syn_arfm4=read_csv_file(csvs_syn_arfm4)
X_syn_arfm4, dim_arfm4=get_synthetic_data(csvs_syn_arfm4, txts_syn_arfm4, df_synthetic_array_syn_arfm4)


what merged so far:  (4840, 5801)
what merged so far:  (9680, 5801)
what merged so far:  (14520, 5801)
what merged so far:  (19360, 5801)
what merged so far:  (24200, 5801)
what merged so far:  (29040, 5801)
what merged so far:  (33880, 5801)
what merged so far:  (38720, 5801)
final merge shape (41382, 5801)
----------------------------------------
Time Execution: 
--- 39.67 seconds ---


In [None]:
X_scaled_syn_arfm4 = scaler.transform(X_syn_arfm4) 
X_scaled_real = scaler.transform(X_real) 
print(X_scaled_syn_arfm4.shape)
print(X_scaled_real.shape)


(41382, 5801)
(418, 5801)


In [None]:
X_arfm4 = np.concatenate((X_scaled_syn_arfm4, X_scaled_real), axis=0)
X_arfm4 = X_arfm4[..., np.newaxis] 

print(X_arfm4.shape)



(41800, 5801, 1)


In [None]:
label_synthetic = ([i for i in range(4,61)]*3)*1
y_syn = [label_synthetic[i//dim_arfm4] for i in range(len(label_synthetic)*dim_arfm4)]

y=y_syn+y_real
print(len(y))


41800


In [None]:
X_train, X_test, y_train, y_test=train_test_split(X_arfm4,y, test_size=0.3, 
                                                  shuffle=True, random_state=1)
# X_train, X_val, y_train, y_val=train_test_split(X_train,y_train, test_size=0.2, 
#                                                   shuffle=True, random_state=1)


In [None]:

y_train = np.array(y_train)
y_test = np.array(y_test)


print("X_train:",X_train.shape, "y_train:",len(y_train))
print("X_test:",X_test.shape, "y_test:",len(y_test))





X_train: (29260, 5801, 1) y_train: 29260
X_test: (12540, 5801, 1) y_test: 12540


In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import KFold

# Merge inputs and targets
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=5, shuffle=True)
loss_per_fold=[]
# K-fold Cross Validation model evaluation
fold_no = 1
es = EarlyStopping(monitor='val_loss', mode='min',verbose=1, patience=5)

for train, test in kfold.split(inputs, targets):

  # Define the model architecture
  model = create_model()

  print('------------------------------------------------------------------------')
  print(f'Training for fold {fold_no} ...')

  # Fit data to model
  history = model.fit(inputs[train], targets[train], validation_data=(inputs[test], targets[test]), batch_size=254, epochs =50, 
                      callbacks=[es])
  # Generate generalization metrics
  scores = model.evaluate(recorded_scaled, np.array(label_recorded), verbose=0)
  # print(f'Score for fold {fold_no}: {model.metrics_names} of {scores}')
  print(f'MY DATA: Score for fold {fold_no}: {model.metrics_names} of {scores}')
  loss_per_fold.append(scores)

  # Increase fold number
  fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
MY DATA: Score for fold 1: ['loss'] of 15.188679695129395
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 18: early stopping
MY DATA: Score for fold 2: ['loss'] of 6.996931552886963
------------------------------------------------------------------------
Training for fold 3 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch

In [None]:
loss_per_fold

[15.188679695129395,
 6.996931552886963,
 3.2668652534484863,
 4.9630818367004395,
 15.188679695129395]

In [None]:
print(f"mean {np.mean(loss_per_fold)} and std {np.std(loss_per_fold)}")


mean 9.120847606658936 and std 5.093217401618583


## General Amplitude = 3

In [None]:
df_synthetic_array_syn_range3=read_csv_file(csvs_syn_range3)
X_syn_range3, dim_range3=get_synthetic_data(csvs_syn_range3, txts_syn_range3, df_synthetic_array_syn_range3)


what merged so far:  (4840, 5801)
what merged so far:  (9680, 5801)
what merged so far:  (14520, 5801)
what merged so far:  (19360, 5801)
what merged so far:  (24200, 5801)
what merged so far:  (29040, 5801)
what merged so far:  (33880, 5801)
what merged so far:  (38720, 5801)
final merge shape (41382, 5801)
----------------------------------------
Time Execution: 
--- 37.02 seconds ---


In [None]:
X_scaled_syn_range3 = scaler.transform(X_syn_range3) 
X_scaled_real = scaler.transform(X_real) 
print(X_scaled_syn_range3.shape)
print(X_scaled_real.shape)


(41382, 5801)
(418, 5801)


In [None]:
X_range3 = np.concatenate((X_scaled_syn_range3, X_scaled_real), axis=0)
X_range3 = X_range3[..., np.newaxis] 

print(X_range3.shape)



(41800, 5801, 1)


In [None]:
label_synthetic = ([i for i in range(4,61)]*3)*1
y_syn = [label_synthetic[i//dim_range3] for i in range(len(label_synthetic)*dim_range3)]

y=y_syn+y_real
print(len(y))


41800


In [None]:
X_train, X_test, y_train, y_test=train_test_split(X_range3,y, test_size=0.3, 
                                                  shuffle=True, random_state=1)
# X_train, X_val, y_train, y_val=train_test_split(X_train,y_train, test_size=0.2, 
#                                                   shuffle=True, random_state=1)


In [None]:

y_train = np.array(y_train)
y_test = np.array(y_test)


print("X_train:",X_train.shape, "y_train:",len(y_train))
print("X_test:",X_test.shape, "y_test:",len(y_test))





X_train: (29260, 5801, 1) y_train: 29260
X_test: (12540, 5801, 1) y_test: 12540


In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import KFold

# Merge inputs and targets
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=5, shuffle=True)
loss_per_fold=[]
# K-fold Cross Validation model evaluation
fold_no = 1
es = EarlyStopping(monitor='val_loss', mode='min',verbose=1, patience=5)

for train, test in kfold.split(inputs, targets):

  # Define the model architecture
  model = create_model()

  print('------------------------------------------------------------------------')
  print(f'Training for fold {fold_no} ...')

  # Fit data to model
  history = model.fit(inputs[train], targets[train], validation_data=(inputs[test], targets[test]), batch_size=254, epochs =50, 
                      callbacks=[es])
  # Generate generalization metrics
  scores = model.evaluate(recorded_scaled, np.array(label_recorded), verbose=0)
  # print(f'Score for fold {fold_no}: {model.metrics_names} of {scores}')
  print(f'MY DATA: Score for fold {fold_no}: {model.metrics_names} of {scores}')
  loss_per_fold.append(scores)

  # Increase fold number
  fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
MY DATA: Score for fold 1: ['loss'] of 15.188679695129395
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
MY DATA: Score for fold 2: ['loss'] of 15.188679695129395
------------------------------------------------------------------------
Training for fold 3 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
MY DATA: Score for fold 3: ['loss'] of 15.188679695129395
------------------------------------------------------------------------
Training for fold 4 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
MY DATA: Score for fold 4: ['loss'] of 15.188679695129395
--------------------

In [None]:
loss_per_fold

[15.188679695129395,
 15.188679695129395,
 15.188679695129395,
 15.188679695129395,
 15.188679695129395]

In [None]:
print(f"mean {np.mean(loss_per_fold)} and std {np.std(loss_per_fold)}")


mean 15.188679695129395 and std 0.0


## General Amplitude = 6

In [None]:
df_synthetic_array_syn_range6=read_csv_file(csvs_syn_range6)
X_syn_range6, dim_range6=get_synthetic_data(csvs_syn_range6, txts_syn_range6, df_synthetic_array_syn_range6)


what merged so far:  (4840, 5801)
what merged so far:  (9680, 5801)
what merged so far:  (14520, 5801)
what merged so far:  (19360, 5801)
what merged so far:  (24200, 5801)
what merged so far:  (29040, 5801)
what merged so far:  (33880, 5801)
what merged so far:  (38720, 5801)
final merge shape (41382, 5801)
----------------------------------------
Time Execution: 
--- 36.88 seconds ---


In [None]:
X_scaled_syn_range6 = scaler.transform(X_syn_range6) 
X_scaled_real = scaler.transform(X_real) 
print(X_scaled_syn_range6.shape)
print(X_scaled_real.shape)


(41382, 5801)
(418, 5801)


In [None]:
X_range6 = np.concatenate((X_scaled_syn_range6, X_scaled_real), axis=0)
X_range6 = X_range6[..., np.newaxis] 

print(X_range6.shape)



(41800, 5801, 1)


In [None]:
label_synthetic = ([i for i in range(4,61)]*3)*1
y_syn = [label_synthetic[i//dim_range6] for i in range(len(label_synthetic)*dim_range6)]

y=y_syn+y_real
print(len(y))


41800


In [None]:
X_train, X_test, y_train, y_test=train_test_split(X_range6,y, test_size=0.3, 
                                                  shuffle=True, random_state=1)
# X_train, X_val, y_train, y_val=train_test_split(X_train,y_train, test_size=0.2, 
#                                                   shuffle=True, random_state=1)


In [None]:

y_train = np.array(y_train)
y_test = np.array(y_test)


print("X_train:",X_train.shape, "y_train:",len(y_train))
print("X_test:",X_test.shape, "y_test:",len(y_test))





X_train: (29260, 5801, 1) y_train: 29260
X_test: (12540, 5801, 1) y_test: 12540


In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import KFold

# Merge inputs and targets
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=5, shuffle=True)
loss_per_fold=[]
# K-fold Cross Validation model evaluation
fold_no = 1
es = EarlyStopping(monitor='val_loss', mode='min',verbose=1, patience=5)

for train, test in kfold.split(inputs, targets):

  # Define the model architecture
  model = create_model()

  print('------------------------------------------------------------------------')
  print(f'Training for fold {fold_no} ...')

  # Fit data to model
  history = model.fit(inputs[train], targets[train], validation_data=(inputs[test], targets[test]), batch_size=254, epochs =50, 
                      callbacks=[es])
  # Generate generalization metrics
  scores = model.evaluate(recorded_scaled, np.array(label_recorded), verbose=0)
  # print(f'Score for fold {fold_no}: {model.metrics_names} of {scores}')
  print(f'MY DATA: Score for fold {fold_no}: {model.metrics_names} of {scores}')
  loss_per_fold.append(scores)

  # Increase fold number
  fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
MY DATA: Score for fold 1: ['loss'] of 15.188679695129395
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
MY DATA: Score for fold 2: ['loss'] of 15.188679695129395
------------------------------------------------------------------------
Training for fold 3 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
MY DATA: Score for fold 3: ['loss'] of 15.188679695129395
------------------------------------------------------------------------
Training for fold 4 ...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
MY DATA: Score for fold 4: ['loss'] of 15.188679695129395
--------------------

In [None]:
loss_per_fold

[15.188679695129395,
 15.188679695129395,
 15.188679695129395,
 15.188679695129395,
 15.188679695129395]

In [None]:
print(f"mean {np.mean(loss_per_fold)} and std {np.std(loss_per_fold)}")


mean 15.188679695129395 and std 0.0
