In [13]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import kerastuner as kt
from sklearn.model_selection import train_test_split
import seaborn as sns


def plot_curve(epochs, hist, list_of_metrics,name):
    """Plot a curve of one or more classification metrics vs. epoch."""  
    plt.figure()
    plt.xlabel("Epoch "+name)
    plt.ylabel("Value")

    for m in list_of_metrics:
        x = hist[m]
        plt.plot(epochs[1:], x[1:], label=m)
    plt.legend()
    
    
def create_model_optimizer(hp):
    model = tf.keras.models.Sequential()
    
   
    
    hp_layers=hp.Int('additional_number_layers', min_value = 2, max_value = 8, step = 2)
    hp_units=[]

    for i in range(hp_layers):
        hp_units.append(hp.Int(str('additional_units'+str(i)), min_value = 64, max_value = 4544, step = 128))
        model.add(tf.keras.layers.Dense(units = hp_units[-1], activation = 'relu'))
        
        
        
    hp_lr=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    hp_momentum=hp.Choice('momentum', values=[1e-2, 1e-1, 2e-1,5e-1])

    model.add(tf.keras.layers.Dense(units=1,name='Output', activation = 'relu'))                             
    model.compile(optimizer=tf.keras.optimizers.SGD(lr=hp_lr,momentum=hp_momentum),                                       
                loss=tf.keras.losses.MeanAbsoluteError(),
                metrics=[tf.keras.metrics.MeanAbsoluteError()])
    return model


def train_model(model,x_data, y_data, epochs, label_name,
                batch_size=None,shuffle=True):
    #features = {name:np.array(value) for name, value in dataset.items()}
    history = model.fit(x=x_data, y=y_data, batch_size=batch_size,
                      epochs=epochs, shuffle=shuffle,validation_split=0.2,
                       callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)])
  
    epochs = history.epoch
    hist = pd.DataFrame(history.history)
    return epochs, hist
    
    
#returns dataframe
def test_model(model,x_data, y_data ,label_name):
    evaluation=model.evaluate(x = x_data, y = y_data, batch_size=batch_size)
    predicted = model.predict(x_data)
    df_test=pd.DataFrame(y_data,columns=[label_name])
   # print(predicted)
    df_predict=pd.DataFrame(predicted,columns=[label+"_pred" for label in label_name])
    return pd.concat([df_test,df_predict], axis=1)



In [4]:

all_labels_features=["delta","lambda","all_maxima","Intensity","overlap_s0_s2_k6a","overlap_s0_s2_k1","overlap_s0_s2_k9a"]
all_data=pd.read_csv("G:\OneDrive - bwedu\Master\Forschungspraktikum\Inga\pc-forschi\generated_Data/all_param_4_values_with_overlap.csv")
df_feature_labels=all_data[all_labels_features]

max_no_of_peak_list=max(all_data["no_of_max"])
print(max_no_of_peak_list)
df_feature_labels.head()

7


Unnamed: 0,delta,lambda,all_maxima,Intensity,overlap_s0_s2_k6a,overlap_s0_s2_k1,overlap_s0_s2_k9a
0,0.6,0.1,[0.466 0.54 0.612 0.67 0.742],[104.475076 104.507572 87.7231385 58.12209...,"[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"
1,0.6,0.2333,[0.51 0.584 0.65 0.712 0.778],[121.232278 120.055307 85.8745546 56.17655...,"[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.9769, 0.0228, 0.0003, 0.0, 0.0]"
2,0.6,0.2333,[0.482 0.556 0.628 0.686 0.756],[101.883081 101.900808 85.5862194 56.79913...,"[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"
3,0.5333,0.2333,[0.418 0.492 0.564 0.622 0.692],[101.197996 101.175315 85.035938 56.43656...,"[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"
4,0.5333,0.1,[0.4 0.474 0.546 0.604 0.676],[104.345828 104.372914 87.6137903 58.09441...,"[0.5232, 0.3386, 0.1098, 0.0239, 0.004]","[0.7501, 0.2155, 0.031, 0.003, 0.0002]","[0.8102, 0.1704, 0.018, 0.0013, 0.0001]"


In [5]:
#convert string lists into numpy arrays in dict

all_maxima_array=np.asarray([  np.asarray([x for x in row.replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["all_maxima"] ])

intensity_array=np.asarray([  np.asarray([x for x in row.replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["Intensity"] ])

overlap_s0_s2_k6a_array=np.asarray([  np.asarray([x for x in row.replace(",","").replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["overlap_s0_s2_k6a"] ])

overlap_s0_s2_k1_array=np.asarray([  np.asarray([x for x in row.replace(",","").replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["overlap_s0_s2_k1"] ])

overlap_s0_s2_k9a_array=np.asarray([  np.asarray([x for x in row.replace(",","").replace("[","").replace("]","").replace("  "," ",5).replace(" ",";").split(";") if x!=""],dtype=np.float64)     for row in df_feature_labels["overlap_s0_s2_k9a"] ])

In [6]:
#pad all_maxima_array and intensity_array
all_maxima_array_padded=np.zeros((len(all_maxima_array),max_no_of_peak_list))
intensity_array_padded=np.zeros((len(intensity_array),max_no_of_peak_list))

for i in range(len(all_maxima_array)):
    for j in range(len(all_maxima_array[i])):
        all_maxima_array_padded[i][j]=all_maxima_array[i][j]
        intensity_array_padded[i][j]=intensity_array[i][j]


In [7]:
concat_label=np.concatenate((overlap_s0_s2_k6a_array,overlap_s0_s2_k1_array,overlap_s0_s2_k9a_array),axis=1)
concat_feature=np.concatenate((all_maxima_array_padded,intensity_array_padded),axis=1)

x_train, x_test,y_train,y_test = train_test_split( concat_feature, concat_label  ,test_size=0.20, random_state=42)


In [14]:
MAX_TRIALS = 10

EXECUTIONS_PER_TRIAL = 3

tuner = kt.RandomSearch(

    create_model_optimizer,

    objective='val_mean_absolute_error',

    max_trials=MAX_TRIALS,

    executions_per_trial=EXECUTIONS_PER_TRIAL,

    directory='franck_condon_test',
    #overwrite = True,
    project_name='first Try',

    seed=1

)

tuner.search(x_train, y_train, epochs = 3, validation_data = (x_test, y_test))


INFO:tensorflow:Reloading Oracle from existing project franck_condon_test\first Try\oracle.json
Epoch 1/60
 310/1637 [====>.........................] - ETA: 1:40 - loss: 0.2017 - mean_absolute_error: 0.2017

KeyboardInterrupt: 

In [None]:
tuner.results_summary()


In [None]:
tuner.oracle.get_best_trials(num_trials=2)[0].hyperparameters.values

In [None]:
tuner.oracle.get_best_trials(num_trials=2)[1].hyperparameters.values

In [None]:
best_model = tuner.get_best_models()[0]


In [None]:
all_label_list=['k6a 0',"k6a 1","k6a 2","k6a 3","k6a 4","k1 0","k1 1","k1 2","k1 3","k1 4","k9a 0","k9a 1","k9a 2","k9a 3","k9a 4"]

delta_test_result=testbest_model_model(my_model,x_test,y_test,all_label_list)

columns_names=delta_test_result.columns

compare_k6a=columns_names[[0,1,2,3,4,15,16,17,18,19]]
compare_k1=columns_names[[5,6,7,8,9,20,21,22,23,24]]
compare_k9a=columns_names[[10,11,12,13,14,25,26,27,28,29]]
#print(compare_k6a)

#delta_test_result[compare_k6a].head(50)
#delta_test_result[compare_k1].head(50)
delta_test_result[compare_k9a].head(50)

In [None]:
delta_test_result[compare_k9a].corr()

In [None]:

sns.set_theme(style="white")

# Generate a large random dataset
rs = np.random.RandomState(33)
d = delta_test_result
# Compute the correlation matrix
corr = d.corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})

In [None]:
#my_model.save("saved_Models/lambda_100_best_model_main_max_err_0_0027")