In [20]:
import numpy as np
import tensorflow 
import random

In [2]:
structures = np.loadtxt("structures_120k.txt")

In [3]:
spectra = np.loadtxt("spectra_120k.dat")
spectra = spectra.reshape(120000,61,4)

In [4]:
structures = structures/200

In [5]:
structures_train,structures_test,spec_train,spec_test = structures[:110000],structures[110000:],spectra[:110000],spectra[110000:]

In [6]:
spec_test.shape

(10000, 61, 4)

In [7]:
def euclidean_distance(a,b):
    return np.linalg.norm(a-b)

In [8]:
def greedy_selection(data, num_samples, start_index):
    selected_indices = [start_index]
    remaining_indices = list(range(len(data)))
    remaining_indices.remove(start_index)

    while len(selected_indices) < num_samples:
        last_selected = data[selected_indices[-1]]
        distances = np.array([euclidean_distance(last_selected, data[i]) for i in remaining_indices])
        max_distance_index = np.argmax(distances)
        selected_indices.append(remaining_indices[max_distance_index])
        del remaining_indices[max_distance_index]

    return np.array(selected_indices)


In [9]:
random_start = True  # Set to True for a random starting index, False for deterministic
if random_start:
    starting_index = random.randint(0, len(structures_train) - 1)
else:
    starting_index = 0  # Fixed starting index for deterministic results


In [10]:
print(starting_index)

102203


In [11]:
selected_indices = greedy_selection(structures_train,5000, starting_index)
selected_structures = structures_train[selected_indices]
selected_spectra = spec_train[selected_indices]


In [19]:
print(selected_structures[:5])

[[0.745 0.94  0.65  0.81  0.935 0.77  0.7   0.555]
 [0.32  0.165 0.21  0.165 0.155 0.215 0.125 0.72 ]
 [0.97  0.825 0.97  0.745 0.99  0.965 0.99  0.215]
 [0.25  0.15  0.19  0.445 0.195 0.175 0.185 0.985]
 [0.86  0.75  0.995 0.865 0.96  0.99  0.9   0.155]]


In [21]:
model = tensorflow.keras.Sequential([
    tensorflow.keras.layers.Input(shape=(8,)),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Reshape((16, 8)),
    tensorflow.keras.layers.Conv1D(128, kernel_size=5, padding='same', activation='elu'),

    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(256, activation='elu'),

    tensorflow.keras.layers.Reshape((32, 8)),
    tensorflow.keras.layers.Conv1D(256, kernel_size=5, padding='same', activation='elu'),
    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Reshape((16, 8)),
    tensorflow.keras.layers.Conv1D(128, kernel_size=5, padding='same', activation='elu'),
    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Reshape((16, 8)),
    tensorflow.keras.layers.Conv1D(128, kernel_size=5, padding='same', activation='elu'),
    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Reshape((16, 8)),
    tensorflow.keras.layers.Conv1D(128, kernel_size=5, padding='same', activation='elu'),
    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Reshape((16, 8)),
    tensorflow.keras.layers.Conv1D(128, kernel_size=5, padding='same', activation='elu'),
    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Dense(128, activation='elu'),
    tensorflow.keras.layers.Dense(244, activation='sigmoid'),
    tensorflow.keras.layers.Reshape((61, 4)),
])

In [22]:
model.summary()

In [23]:
model.compile(optimizer=tensorflow.keras.optimizers.Adam(learning_rate=0.001),loss='mse')

In [24]:
history =  model.fit(selected_structures,selected_spectra,epochs=100,batch_size=256,validation_split=0.3)


Epoch 1/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 54ms/step - loss: 0.1041 - val_loss: 0.0318
Epoch 2/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - loss: 0.0288 - val_loss: 0.0264
Epoch 3/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - loss: 0.0256 - val_loss: 0.0255
Epoch 4/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - loss: 0.0251 - val_loss: 0.0251
Epoch 5/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - loss: 0.0243 - val_loss: 0.0245
Epoch 6/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - loss: 0.0235 - val_loss: 0.0236
Epoch 7/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - loss: 0.0225 - val_loss: 0.0229
Epoch 8/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - loss: 0.0219 - val_loss: 0.0226
Epoch 9/100
[1m14/14[0m [32m━━━━━━━━━

In [25]:
predicted_spectra_train = model.predict(structures_train)

[1m3438/3438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step


In [27]:
mse = tensorflow.keras.losses.MeanSquaredError()
loss = mse(spec_train,predicted_spectra_train).numpy()

print(f"The loss for 110k data of test1 is  {loss}")

The loss for 110k data of test1 is  0.019229425117373466


In [28]:
predicted_spectra_test = model.predict(structures_test)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step


In [29]:
mne = tensorflow.keras.losses.MeanSquaredError()
losses = mse(spec_test,predicted_spectra_test).numpy()

print(f"The loss for 10k data of test1 is  {losses}")

The loss for 110k data of test1 is  0.01922651007771492
