In [1]:
import numpy as np
import tensorflow 
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

In [2]:
structures = np.loadtxt("structures_120k.txt")

In [3]:
structures.shape

(120000, 8)

In [4]:
structures = structures/200

In [5]:
structures[:5]

array([[0.935, 0.52 , 0.52 , 0.84 , 0.795, 0.86 , 0.575, 0.505],
       [0.995, 0.85 , 0.59 , 0.125, 0.365, 0.695, 0.99 , 0.21 ],
       [0.665, 0.205, 0.82 , 0.485, 0.44 , 0.315, 0.63 , 0.185],
       [0.515, 0.835, 0.6  , 0.725, 0.585, 0.795, 0.53 , 0.58 ],
       [0.255, 0.885, 0.565, 0.27 , 0.4  , 0.87 , 0.825, 0.725]])

In [6]:
spectra = np.loadtxt("spectra_120k.dat")

In [7]:
spectra = spectra.reshape(120000,61,4)

In [8]:
spectra.shape

(120000, 61, 4)

In [9]:
structures_train,structures_test,spec_train,spec_test = structures[:110000],structures[110000:],spectra[:110000],spectra[110000:]

In [10]:
spec_test.shape

(10000, 61, 4)

In [11]:
k = 500  #here k is no clusters
n_samples = 5000  #no. of data points to select

In [12]:
kmeans = KMeans(n_clusters=k)
kmeans.fit(structures_train)

In [13]:
labels = kmeans.labels_

In [14]:
samples_per_cluster = n_samples // k

In [15]:
sampled_structures = []
sampled_spectra = []

In [16]:
for cluster in range(k):
    # Get the indices of data points in this cluster
    cluster_indices = np.where(labels == cluster)[0]
    
    # Randomly sample 'samples_per_cluster' from this cluster (without fixed seed)
    sampled_indices = np.random.choice(cluster_indices, samples_per_cluster, replace=False)
    
    # Append the sampled data points and their corresponding output spectra
    sampled_structures.append(structures_train[sampled_indices])
    sampled_spectra.append(spec_train[sampled_indices])

In [17]:
sampled_structures = np.vstack(sampled_structures)
sampled_spectra = np.vstack(sampled_spectra)

In [19]:
sampled_spectra.shape

(5000, 61, 4)

In [21]:
model = tensorflow.keras.Sequential([
    tensorflow.keras.layers.Input(shape=(8,)),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Reshape((16, 8)),
    tensorflow.keras.layers.Conv1D(128, kernel_size=5, padding='same', activation='elu'),

    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(256, activation='elu'),

    tensorflow.keras.layers.Reshape((32, 8)),
    tensorflow.keras.layers.Conv1D(256, kernel_size=5, padding='same', activation='elu'),
    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Reshape((16, 8)),
    tensorflow.keras.layers.Conv1D(128, kernel_size=5, padding='same', activation='elu'),
    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Reshape((16, 8)),
    tensorflow.keras.layers.Conv1D(128, kernel_size=5, padding='same', activation='elu'),
    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Reshape((16, 8)),
    tensorflow.keras.layers.Conv1D(128, kernel_size=5, padding='same', activation='elu'),
    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Reshape((16, 8)),
    tensorflow.keras.layers.Conv1D(128, kernel_size=5, padding='same', activation='elu'),
    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(128, activation='elu'),

    tensorflow.keras.layers.Dense(128, activation='elu'),
    tensorflow.keras.layers.Dense(244, activation='sigmoid'),
    tensorflow.keras.layers.Reshape((61, 4)),
])

In [22]:
model.summary()

In [23]:
model.compile(optimizer=tensorflow.keras.optimizers.Adam(learning_rate=0.001),loss='mse')

In [25]:
history =  model.fit(sampled_structures,sampled_spectra,epochs=100,batch_size=256,validation_split=0.3)


Epoch 1/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 53ms/step - loss: 0.1107 - val_loss: 0.0316
Epoch 2/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - loss: 0.0294 - val_loss: 0.0260
Epoch 3/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - loss: 0.0258 - val_loss: 0.0253
Epoch 4/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - loss: 0.0250 - val_loss: 0.0249
Epoch 5/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - loss: 0.0253 - val_loss: 0.0249
Epoch 6/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - loss: 0.0250 - val_loss: 0.0247
Epoch 7/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - loss: 0.0247 - val_loss: 0.0243
Epoch 8/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - loss: 0.0243 - val_loss: 0.0237
Epoch 9/100
[1m14/14[0m [32m━━━━━━━━━

In [26]:
predicted_spectra_train = model.predict(structures_train)


[1m3438/3438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step


In [28]:
mse = tensorflow.keras.losses.MeanSquaredError()
loss = mse(spec_train, predicted_spectra_train).numpy()

print(f"The loss for 110k data of test1 is  {loss}")

The loss for 110k data of test1 is  0.016826007515192032


In [29]:
predicted_spectra_test = model.predict(structures_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step


In [30]:
mne = tensorflow.keras.losses.MeanSquaredError()
losses = mse(spec_test,predicted_spectra_test).numpy()

print(f"The loss for 10k data of test1 is  {losses}")

The loss for 10k data of test1 is  0.01693662442266941
