In [217]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import kerastuner as kt
from sklearn.model_selection import train_test_split


def plot_curve(epochs, hist, list_of_metrics,name):
    """Plot a curve of one or more classification metrics vs. epoch."""  
    plt.figure()
    plt.xlabel("Epoch "+name)
    plt.ylabel("Value")

    for m in list_of_metrics:
        x = hist[m]
        plt.plot(epochs[1:], x[1:], label=m)
    plt.legend()
    
    
# for activation functions check https://keras.io/api/layers/activations/
def create_model2(my_learning_rate,momentum,layers, my_feature_layer,my_metrics,my_act_function = "softmax"):
    model = tf.keras.models.Sequential()
    #model.add(my_feature_layer)

    for layer in layers:
        model.add(tf.keras.layers.Dense(units = layer, activation = my_act_function))
    model.add(tf.keras.layers.Dense(units=3,name='Output', activation = 'relu'))                             
    model.compile(optimizer=tf.keras.optimizers.SGD(lr=my_learning_rate,momentum=momentum),                                       
                loss=tf.keras.losses.MeanAbsoluteError(),
                metrics=my_metrics)
    return model


def train_model(model,dataset, epochs, label_name,
                batch_size=None,shuffle=True):
    features = {name:np.array(value) for name, value in dataset.items()}
    label=(dataset[label_name].to_numpy())


    #label=np.zeros(label.shape)
    history = model.fit(x=features, y=label, batch_size=batch_size,
                      epochs=epochs, shuffle=shuffle,validation_split=0.2,
                       callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)])
  
    epochs = history.epoch
    hist = pd.DataFrame(history.history)
    return epochs, hist
    
    
#returns dataframe
def test_model(model,x_data, df_label ,label_name):
    #features = {name:np.array(value) for name, value in dataset.items()}
    label=df_label[label_name].to_numpy()
   # print(label)
    evaluation=model.evaluate(x = x_data, y = label, batch_size=batch_size)
    predicted = model.predict(x_data)
    
    df_test=pd.DataFrame(label,columns=[label_name])
   # print(predicted)
    df_predict=pd.DataFrame(predicted,columns=[label+"_pred" for label in label_name])
    return pd.concat([df_test,df_predict], axis=1)





In [192]:

all_labels_features=["delta","lambda","all_maxima","Intensity","overlap_s0_s2_k6a","overlap_s0_s2_k1","overlap_s0_s2_k9a"]
all_data=pd.read_csv("G:\OneDrive - bwedu\Master\Forschungspraktikum\Inga\pc-forschi\generated_Data/all_param_4_values_with_overlap.csv")
df_feature_labels=all_data[all_labels_features]

max_no_of_peak_list=max(all_data["no_of_max"])
print(max_no_of_peak_list)
df_feature_labels.head()

7


Unnamed: 0,delta,lambda,all_maxima,Intensity,overlap_s0_s2_k6a,overlap_s0_s2_k1,overlap_s0_s2_k9a
0,0.6,0.1,[0.466 0.54 0.612 0.67 0.742],[104.475076 104.507572 87.7231385 58.12209...,"[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"
1,0.6,0.2333,[0.51 0.584 0.65 0.712 0.778],[121.232278 120.055307 85.8745546 56.17655...,"[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.9769, 0.0228, 0.0003, 0.0, 0.0]"
2,0.6,0.2333,[0.482 0.556 0.628 0.686 0.756],[101.883081 101.900808 85.5862194 56.79913...,"[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"
3,0.5333,0.2333,[0.418 0.492 0.564 0.622 0.692],[101.197996 101.175315 85.035938 56.43656...,"[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"
4,0.5333,0.1,[0.4 0.474 0.546 0.604 0.676],[104.345828 104.372914 87.6137903 58.09441...,"[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"


In [193]:
#convert string lists into numpy arrays in dict
padded_dict={"all_maxima_array_padded":[],"intensity_array_padded":[],"overlap_s0_s2_k6a_array":[],"overlap_s0_s2_k1_array":[],"overlap_s0_s2_k9a_array":[]}

all_maxima_array=np.asarray([  np.asarray([x for x in row.replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["all_maxima"] ])

intensity_array=np.asarray([  np.asarray([x for x in row.replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["Intensity"] ])

padded_dict["overlap_s0_s2_k6a_array"]=np.asarray([  np.asarray([x for x in row.replace(",","").replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["overlap_s0_s2_k6a"] ])

padded_dict["overlap_s0_s2_k1_array"]=np.asarray([  np.asarray([x for x in row.replace(",","").replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["overlap_s0_s2_k1"] ])

padded_dict["overlap_s0_s2_k9a_array"]=np.asarray([  np.asarray([x for x in row.replace(",","").replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["overlap_s0_s2_k9a"] ])

In [194]:
#convert string lists into numpy arrays in dict
padded_dict={"all_maxima_array_padded":[],"intensity_array_padded":[],"overlap_s0_s2_k6a_array":[],"overlap_s0_s2_k1_array":[],"overlap_s0_s2_k9a_array":[]}

all_maxima_array=np.asarray([  np.asarray([x for x in row.replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["all_maxima"] ])

intensity_array=np.asarray([  np.asarray([x for x in row.replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["Intensity"] ])

padded_dict["overlap_s0_s2_k6a_array"]=([  np.asarray([x for x in row.replace(",","").replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["overlap_s0_s2_k6a"] ])

padded_dict["overlap_s0_s2_k1_array"]=([  np.asarray([x for x in row.replace(",","").replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["overlap_s0_s2_k1"] ])

padded_dict["overlap_s0_s2_k9a_array"]=([  np.asarray([x for x in row.replace(",","").replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["overlap_s0_s2_k9a"] ])

In [195]:
#pad all_maxima_array and intensity_array
all_maxima_array_padded=np.zeros((len(all_maxima_array),max_no_of_peak_list))
intensity_array_padded=np.zeros((len(intensity_array),max_no_of_peak_list))

for i in range(len(all_maxima_array)):
    for j in range(len(all_maxima_array[i])):
        all_maxima_array_padded[i][j]=all_maxima_array[i][j]
        intensity_array_padded[i][j]=intensity_array[i][j]
padded_dict["all_maxima_array_padded"]=all_maxima_array_padded.tolist()
padded_dict["intensity_array_padded"]=intensity_array_padded.tolist()

#print(padded_dict)
#f_padded_data=pd.DataFrame({"test":all_maxima_array_padded})
df_padded_data=pd.DataFrame(padded_dict)
df_padded_data.head()

Unnamed: 0,all_maxima_array_padded,intensity_array_padded,overlap_s0_s2_k6a_array,overlap_s0_s2_k1_array,overlap_s0_s2_k9a_array
0,"[0.466, 0.54, 0.612, 0.67, 0.742, 0.0, 0.0]","[104.475076, 104.507572, 87.7231385, 58.122091...","[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"
1,"[0.51, 0.584, 0.65, 0.712, 0.778, 0.0, 0.0]","[121.232278, 120.055307, 85.8745546, 56.176554...","[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.9769, 0.0228, 0.0003, 0.0, 0.0]"
2,"[0.482, 0.556, 0.628, 0.686, 0.756, 0.0, 0.0]","[101.883081, 101.900808, 85.5862194, 56.799134...","[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"
3,"[0.418, 0.492, 0.564, 0.622, 0.692, 0.0, 0.0]","[101.197996, 101.175315, 85.035938, 56.4365614...","[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"
4,"[0.4, 0.474, 0.546, 0.604, 0.676, 0.0, 0.0]","[104.345828, 104.372914, 87.6137903, 58.094417...","[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"


In [196]:
df_train, df_test = train_test_split( df_padded_data  ,test_size=0.20, random_state=42)
df_train.head()

Unnamed: 0,all_maxima_array_padded,intensity_array_padded,overlap_s0_s2_k6a_array,overlap_s0_s2_k1_array,overlap_s0_s2_k9a_array
64718,"[0.478, 0.552, 0.622, 0.684, 0.752, 0.0, 0.0]","[119.103505, 117.778269, 81.2433714, 48.533233...","[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.9769, 0.0228, 0.0003, 0.0, 0.0]"
61163,"[0.536, 0.606, 0.692, 0.0, 0.0, 0.0, 0.0]","[258.847806, 55.8701215, 62.794254, 0.0, 0.0, ...","[0.9307, 0.0669, 0.0024, 0.0001, 0.0]","[0.9686, 0.0309, 0.0005, 0.0, 0.0]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"
54392,"[0.358, 0.43, 0.486, 0.0, 0.0, 0.0, 0.0]","[261.269136, 56.0804316, 92.5564615, 0.0, 0.0,...","[0.9307, 0.0669, 0.0024, 0.0001, 0.0]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.9769, 0.0228, 0.0003, 0.0, 0.0]"
31237,"[0.574, 0.648, 0.72, 0.0, 0.0, 0.0, 0.0]","[162.451292, 142.36798, 72.7022096, 0.0, 0.0, ...","[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.9686, 0.0309, 0.0005, 0.0, 0.0]","[0.9769, 0.0228, 0.0003, 0.0, 0.0]"
45347,"[0.298, 0.372, 0.438, 0.5, 0.568, 0.0, 0.0]","[124.81752, 122.250384, 87.0462094, 56.8060901...","[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.9769, 0.0228, 0.0003, 0.0, 0.0]"


In [197]:
feature_columns_A = []
all_maxima_array_padded = tf.feature_column.numeric_column("all_maxima_array_padded")
feature_columns_A.append(all_maxima_array_padded)
intensity_array_padded = tf.feature_column.numeric_column("intensity_array_padded")
feature_columns_A.append(intensity_array_padded)

my_feature_layer_A = tf.keras.layers.DenseFeatures(feature_columns_A)

In [218]:
learning_rate = 1e-2
momentum=0.7
epochs = 100
batch_size = 150

#specify the classification threshold
classification_threshold = 0.15

# Establish the metrics the model will measure.
metric = [tf.keras.metrics.MeanAbsoluteError()]
layers=[16,256,64,16]
all_label_list=["overlap_s0_s2_k6a_array","overlap_s0_s2_k1_array","overlap_s0_s2_k9a_array"]



my_model= create_model2(learning_rate,momentum,layers, my_feature_layer_A,metric,my_act_function="relu")

#lambda_train, lambda_test
#delta_train, delta_test

epochs_run, hist = train_model(my_model,df_train, epochs, 
                          all_label_list, batch_size)


[[array([0.5232, 0.3386, 0.1098, 0.0239, 0.004 ])
  array([7.501e-01, 2.155e-01, 3.100e-02, 3.000e-03, 2.000e-04])
  array([9.769e-01, 2.280e-02, 3.000e-04, 0.000e+00, 0.000e+00])]
 [array([9.307e-01, 6.690e-02, 2.400e-03, 1.000e-04, 0.000e+00])
  array([9.686e-01, 3.090e-02, 5.000e-04, 0.000e+00, 0.000e+00])
  array([8.102e-01, 1.704e-01, 1.800e-02, 1.300e-03, 1.000e-04])]
 [array([9.307e-01, 6.690e-02, 2.400e-03, 1.000e-04, 0.000e+00])
  array([7.501e-01, 2.155e-01, 3.100e-02, 3.000e-03, 2.000e-04])
  array([9.769e-01, 2.280e-02, 3.000e-04, 0.000e+00, 0.000e+00])]
 ...
 [array([0.5232, 0.3386, 0.1098, 0.0239, 0.004 ])
  array([7.501e-01, 2.155e-01, 3.100e-02, 3.000e-03, 2.000e-04])
  array([8.102e-01, 1.704e-01, 1.800e-02, 1.300e-03, 1.000e-04])]
 [array([0.5232, 0.3386, 0.1098, 0.0239, 0.004 ])
  array([9.686e-01, 3.090e-02, 5.000e-04, 0.000e+00, 0.000e+00])
  array([8.102e-01, 1.704e-01, 1.800e-02, 1.300e-03, 1.000e-04])]
 [array([9.307e-01, 6.690e-02, 2.400e-03, 1.000e-04, 0.000e+

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).