In [106]:
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np


from tensorflow.keras.layers import LSTM

In [107]:
df_spectra=pd.read_csv("spectrum_energy_intensity2.csv",index_col=[0])
df_spectra.head(-50)

Unnamed: 0,Energy,Intensity,k6a1,k6a2,k11,k12,k9a1,k9a2
0,0.874,0.019935,0.25,0.25,0.25,0.25,0.25,0.25
1,0.213,0.060488,0.25,0.25,0.25,0.25,0.25,0.50
2,0.866,0.020153,0.25,0.25,0.25,0.25,0.25,0.75
3,0.867,0.019641,0.25,0.25,0.25,0.25,0.50,0.25
4,0.220,0.060471,0.25,0.25,0.25,0.25,0.50,0.50
...,...,...,...,...,...,...,...,...
674,0.868,0.021991,0.75,0.75,0.25,0.75,0.75,0.75
675,0.865,0.021101,0.75,0.75,0.50,0.25,0.25,0.25
676,0.213,0.060723,0.75,0.75,0.50,0.25,0.25,0.50
677,0.868,0.019078,0.75,0.75,0.50,0.25,0.25,0.75


In [108]:
numpy_input=df_spectra[["k6a1","k6a2","k11","k12","k9a1","k9a2"]].to_numpy()
numpy_energy_intensity=df_spectra[["Energy","Intensity"]].to_numpy()


df_training=df_spectra.sample(frac=0.8,random_state=20)
df_testing=df_spectra.drop(df_training.index)

In [109]:
feature_columns_A = []

k6a1 = tf.feature_column.numeric_column("k6a1")
feature_columns_A.append(k6a1)

k6a2 = tf.feature_column.numeric_column("k6a2")
feature_columns_A.append(k6a2)

k11 = tf.feature_column.numeric_column("k11")
feature_columns_A.append(k11)

k12 = tf.feature_column.numeric_column("k12")
feature_columns_A.append(k12)

k9a1 = tf.feature_column.numeric_column("k9a1")
feature_columns_A.append(k9a1)

k9a2 = tf.feature_column.numeric_column("k9a2")
feature_columns_A.append(k9a2)

my_feature_layer_A = tf.keras.layers.DenseFeatures(feature_columns_A)


In [110]:
#define the plotting function.

def plot_the_loss_curve(epochs, mse,mse_test=0):
    """Plot a curve of loss vs. epoch."""

    plt.figure()
    plt.xlabel("Epoch")
    plt.ylabel("Mean Squared Error")

    plt.plot(epochs, mse, label="Loss")
    if(mse_test!=0):
        plt.plot(epochs,mse_test)
    plt.legend()
    plt.ylim([mse.min()*0.95, mse.max() * 1.03])
    plt.show()  

print("Defined the plot_the_loss_curve function.")

Defined the plot_the_loss_curve function.


In [111]:
def create_model(my_learning_rate, my_feature_layer,unit_layer_list=[10,12],regulations=0.04):
    """Create and compile a regression model."""
    model = tf.keras.models.Sequential()
    # Add the layer containing the feature columns to the model.
    model.add(my_feature_layer)

    # Describe the topography of the model by calling the tf.keras.layers.Dense
    # method once for each layer. We've specified the following arguments:
    #   * units specifies the number of nodes in this layer.
    #   * activation specifies the activation function (Rectified Linear Unit).
    #   * name is just a string that can be useful when debugging.

    # Define the first hidden layer with 10 nodes.   
    for n in unit_layer_list:
        print(n)
        model.add(tf.keras.layers.Dense(units=n, activation='relu', 
                                  kernel_regularizer=tf.keras.regularizers.l2(regulations),name='Hidden'+str(n)))
                
   
   

    # Define the output layer.
    model.add(tf.keras.layers.Dense(units=2,  
                                    name='Output'))                              
  
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=my_learning_rate),
                loss="mean_squared_error",
                metrics=[tf.keras.metrics.MeanSquaredError()])
    return model



In [124]:
def create_multilabel_model(my_learning_rate, my_feature_layer,unit_layer_list=[10,12],regulations=0.04):
    """Create and compile a regression model."""
    model = tf.keras.models.Sequential()
    # Add the layer containing the feature columns to the model.
    model.add(my_feature_layer)

    # Describe the topography of the model by calling the tf.keras.layers.Dense
    # method once for each layer. We've specified the following arguments:
    #   * units specifies the number of nodes in this layer.
    #   * activation specifies the activation function (Rectified Linear Unit).
    #   * name is just a string that can be useful when debugging.

    # Define the first hidden layer with 10 nodes.   
    model.add(tf.keras.layers.Dense(20, input_dim=5, kernel_initializer='he_uniform', activation='relu'))
    #outputlayer
    model.add(tf.keras.layers.Dense(2, activation='sigmoid'))
   

    
    
  
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=my_learning_rate),
                loss='binary_crossentropy',
                metrics=[tf.keras.metrics.MeanSquaredError()])
    return model


In [125]:
def train_model(model, dataset, epochs, label_name,
                batch_size=None):
    """Train the model by feeding it data."""

    # Split the dataset into features and label.
    
    print(dataset.items())

    features = {name:np.array(value) for name, value in dataset.items()}
    label = [np.array(features.pop(label)) for label in label_name]
    history = model.fit(x=features, y=label, batch_size=batch_size,
                          epochs=epochs, shuffle=True) 
    
   
    # The list of epochs is stored separately from the rest of history.
    epochs = history.epoch
  
    # To track the progression of training, gather a snapshot
    # of the model's mean squared error at each epoch. 
    hist = pd.DataFrame(history.history)
    mse = hist["mean_squared_error"]

    return epochs, mse

In [126]:
# The following variables are the hyperparameters.
learning_rate = 0.0001
epochs_ini = 8000
batch_size = 500

# Specify the label
label_name = ["Energy","Intensity"]

#try different things:

my_model = create_multilabel_model(learning_rate, my_feature_layer_A,unit_layer_list=[10,15,10],regulations=1e-7)

epochs, mse = train_model(my_model, df_training, epochs_ini, 
                          label_name, batch_size)



<generator object DataFrame.items at 0x000001A6D2587EC8>
Epoch 1/8000


ValueError: in user code:

    H:\Users\Gwydion\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:571 train_function  *
        outputs = self.distribute_strategy.run(
    H:\Users\Gwydion\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    H:\Users\Gwydion\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    H:\Users\Gwydion\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    H:\Users\Gwydion\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:533 train_step  **
        y, y_pred, sample_weight, regularization_losses=self.losses)
    H:\Users\Gwydion\anaconda3\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:205 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    H:\Users\Gwydion\anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:143 __call__
        losses = self.call(y_true, y_pred)
    H:\Users\Gwydion\anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:246 call
        return self.fn(y_true, y_pred, **self._fn_kwargs)
    H:\Users\Gwydion\anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:1595 binary_crossentropy
        K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1)
    H:\Users\Gwydion\anaconda3\lib\site-packages\tensorflow\python\keras\backend.py:4692 binary_crossentropy
        return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
    H:\Users\Gwydion\anaconda3\lib\site-packages\tensorflow\python\ops\nn_impl.py:172 sigmoid_cross_entropy_with_logits
        (logits.get_shape(), labels.get_shape()))

    ValueError: logits and labels must have the same shape ((None, 2) vs (None, 1))


In [114]:
test_features = {name:np.array(value) for name, value in df_testing.items()}
test_label = [np.array(test_features.pop(label)) for label in label_name]


#print(test_label)
example_mydata = df_testing.sample(frac=0.8,random_state=20)
#run the evaluation
# i checked and they should be correct
example_features = {name:np.array(value) for name, value in example_mydata.items()}
#print(example_features)

predicted = 2*my_model.predict(example_features)-1

#print(predicted)


predicted=predicted.reshape(len(predicted),2)
#print(predicted)
predicted_1=[predicted[i][0] for i in range(len(predicted))]
predicted_2=[predicted[i][1] for i in range(len(predicted))]


print(predicted[:])
print(predicted_1[:5])
print(predicted_2[:5])

exact_energy = example_mydata["Energy"].to_numpy()*2-1
exact_intensity = example_mydata["Intensity"].to_numpy()



print("Intensity:",my_model.evaluate(x = test_features, y = test_label, batch_size=batch_size))

df_compare=pd.DataFrame({"predicted1":predicted_1,"predicted2":predicted_2, "Reference Energy":exact_energy, "Reference Intensity":exact_intensity})

#df_compare.plot.scatter(x="exact", y="predicted")
df_compare.to_csv("first_test.csv")
df_compare.head(50)

[[ 0.37709868  0.36820948]
 [-0.49683225 -0.48805594]
 [-0.5092186  -0.5223993 ]
 [-0.49062806 -0.48549956]
 [ 0.43057132  0.42041242]
 [-0.4063732  -0.42487228]
 [-0.58303154 -0.6164681 ]
 [-0.52944577 -0.53274256]
 [-0.44129103 -0.4378969 ]
 [-0.4347006  -0.46810448]
 [ 0.27060664  0.29598427]
 [ 0.47858703  0.47076666]
 [-0.48137194 -0.5125185 ]
 [ 0.31988382  0.32947946]
 [-0.54751563 -0.47290266]
 [ 0.30206168  0.29654324]
 [ 0.26698768  0.29482174]
 [ 0.22406638  0.26966596]
 [ 0.3525566   0.3383478 ]
 [ 0.2903434   0.27980816]
 [-0.50131273 -0.51070666]
 [ 0.33987784  0.31987846]
 [-0.4298271  -0.40482557]
 [ 0.19293511  0.16845214]
 [ 0.18797994  0.18977809]
 [ 0.29798377  0.3206017 ]
 [ 0.21533239  0.14310014]
 [ 0.20442617  0.17751598]
 [ 0.30347002  0.3155415 ]
 [ 0.3580885   0.36838734]
 [ 0.36614776  0.37378192]
 [-0.49405807 -0.4940989 ]
 [ 0.38048363  0.38772702]
 [ 0.36828446  0.3656149 ]
 [ 0.2683159   0.22420526]
 [ 0.3131888   0.31936717]
 [-0.40545124 -0.41796976]
 

Unnamed: 0,predicted1,predicted2,Reference Energy,Reference Intensity
0,0.377099,0.368209,0.732,0.021675
1,-0.496832,-0.488056,-0.632,0.074461
2,-0.509219,-0.522399,-0.63,0.07608
3,-0.490628,-0.4855,-0.756,0.001645
4,0.430571,0.420412,0.176,0.063414
5,-0.406373,-0.424872,-0.63,0.07608
6,-0.583032,-0.616468,-0.63,0.076
7,-0.529446,-0.532743,-0.632,0.074461
8,-0.441291,-0.437897,-0.756,0.001604
9,-0.434701,-0.468104,-0.63,0.076485


In [186]:
from numpy import mean
from numpy import std
from sklearn.model_selection import RepeatedKFold
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor

In [187]:
df_spectra=pd.read_csv("spectrum_energy_intensity2.csv",index_col=[0])
df_spectra.head(-50)

Unnamed: 0,Energy,Intensity,k6a1,k6a2,k11,k12,k9a1,k9a2
0,0.874,0.019935,0.25,0.25,0.25,0.25,0.25,0.25
1,0.213,0.060488,0.25,0.25,0.25,0.25,0.25,0.50
2,0.866,0.020153,0.25,0.25,0.25,0.25,0.25,0.75
3,0.867,0.019641,0.25,0.25,0.25,0.25,0.50,0.25
4,0.220,0.060471,0.25,0.25,0.25,0.25,0.50,0.50
...,...,...,...,...,...,...,...,...
674,0.868,0.021991,0.75,0.75,0.25,0.75,0.75,0.75
675,0.865,0.021101,0.75,0.75,0.50,0.25,0.25,0.25
676,0.213,0.060723,0.75,0.75,0.50,0.25,0.25,0.50
677,0.868,0.019078,0.75,0.75,0.50,0.25,0.25,0.75


In [188]:

numpy_input=df_spectra[["k6a1","k6a2","k11","k12","k9a1","k9a2"]].to_numpy()
numpy_energy_intensity=df_spectra[["Energy","Intensity"]].to_numpy()


In [208]:
def get_model(n_inputs, n_outputs):
    model = Sequential()
    model.add(Dense(20, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(10, activation='relu'))

    model.add(Dense(n_outputs, activation='relu'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [215]:
def evaluate_model(X, y):
    results = list()
    n_inputs, n_outputs = X.shape[1], y.shape[1]
    # define evaluation procedure
    cv = RepeatedKFold(n_splits=2, n_repeats=1, random_state=1)
    # enumerate folds
    for train_ix, test_ix in cv.split(X):
        # prepare data
        X_train, X_test = X[train_ix], X[test_ix]
        y_train, y_test = y[train_ix], y[test_ix]
        
        print(X_train,y_train)
        # define model
        model = get_model(n_inputs, n_outputs)
        # fit model
        model.fit(X_train, y_train, verbose=0, epochs=8000)
        # make a prediction on the test set
        yhat = model.predict(X_test)
        # round probabilities to class labels
        yhat = yhat
        # calculate accuracy
        #print(y_test,yhat)
        #no idea
        clf = RandomForestRegressor(n_estimators=10)
        clf.fit(X_train, y_train)
        acc=clf.score(X_test, y_test)
        
        
        #acc = score(y_test, yhat)
        # store result
        print('>%.3f' % acc)
        results.append(acc)
    
    y_predict1=[yhat[i][0] for i in range(len(yhat))]
    y_predict2=[yhat[i][1] for i in range(len(yhat))]
    y_test1=[y_test[i][0] for i in range(len(y_test))]
    y_test2=[y_test[i][1] for i in range(len(y_test))]

    df_compare=pd.DataFrame({"predicted1":y_predict1,"predicted2":y_predict2, "Reference Energy":y_test1, "Reference Intensity":y_test2})

    return results,df_compare




In [None]:
results,compare = evaluate_model(numpy_input, numpy_energy_intensity)

[[0.25 0.25 0.25 0.25 0.25 0.5 ]
 [0.25 0.25 0.25 0.25 0.75 0.5 ]
 [0.25 0.25 0.25 0.5  0.25 0.5 ]
 ...
 [0.75 0.75 0.75 0.75 0.5  0.5 ]
 [0.75 0.75 0.75 0.75 0.75 0.5 ]
 [0.75 0.75 0.75 0.75 0.75 0.75]] [[2.13000000e-01 6.04876653e-02]
 [2.13000000e-01 6.03352854e-02]
 [9.61000000e-01 1.45894112e-01]
 [5.91000000e-01 6.94445602e-02]
 [5.91000000e-01 6.94319200e-02]
 [7.33000000e-01 6.94625128e-02]
 [8.64000000e-01 2.18520311e-02]
 [8.64000000e-01 2.11114439e-02]
 [2.21000000e-01 5.85569916e-02]
 [8.64000000e-01 2.17968878e-02]
 [2.12000000e-01 6.31091435e-02]
 [8.68000000e-01 2.19550364e-02]
 [8.65000000e-01 2.10939210e-02]
 [2.13000000e-01 6.07196599e-02]
 [1.49000000e-01 1.74026571e-02]
 [1.49000000e-01 1.74013446e-02]
 [6.00000000e-01 6.76882947e-02]
 [9.61000000e-01 1.44199737e-01]
 [9.61000000e-01 1.44756338e-01]
 [6.00000000e-01 6.71246529e-02]
 [1.49000000e-01 1.74024096e-02]
 [8.68000000e-01 1.90841095e-02]
 [8.65000000e-01 2.10925686e-02]
 [8.68000000e-01 2.19903235e-02]
 [2.



>0.814
[[0.25 0.25 0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25 0.25 0.75]
 [0.25 0.25 0.25 0.25 0.5  0.25]
 ...
 [0.75 0.75 0.75 0.75 0.25 0.75]
 [0.75 0.75 0.75 0.75 0.5  0.75]
 [0.75 0.75 0.75 0.75 0.75 0.25]] [[8.74000000e-01 1.99354751e-02]
 [8.66000000e-01 2.01526467e-02]
 [8.67000000e-01 1.96407851e-02]
 [2.20000000e-01 6.04711762e-02]
 [8.67000000e-01 1.96734450e-02]
 [8.66000000e-01 2.01568340e-02]
 [8.74000000e-01 2.00017725e-02]
 [7.55000000e-01 3.43193538e-02]
 [7.33000000e-01 6.98470869e-02]
 [9.86000000e-01 7.08146629e-02]
 [9.61000000e-01 1.46052855e-01]
 [7.55000000e-01 3.42133063e-02]
 [8.68000000e-01 2.19944234e-02]
 [2.12000000e-01 6.32058546e-02]
 [8.64000000e-01 2.11120033e-02]
 [8.68000000e-01 1.90769131e-02]
 [2.15000000e-01 5.52185383e-02]
 [8.68000000e-01 1.90990647e-02]
 [2.13000000e-01 6.07190130e-02]
 [8.65000000e-01 2.11018418e-02]
 [7.34000000e-01 6.85879306e-02]
 [7.34000000e-01 7.05798993e-02]
 [9.69000000e-01 1.30275415e-01]
 [7.34000000e-01 7.06091848e-0

In [None]:
print('Accuracy: %.3f (%.3f)' % (mean(results), std(results)))
compare.head(30)

In [None]:
Accuracy: 0.837 (0.008)