In [None]:
import tensorflow as tf
import tf_keras as keras
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from itertools import chain
from tqdm import tqdm
import tqdm.keras

print(keras.__version__)

In [None]:
def make_dataset(mini, maxi, maxsub, num):
    numbers = []
    y = []
    features = []
    for i in range(num):
        arr1 = np.random.randint(mini, maxi, size=(np.random.randint(1, maxsub)))
        numbers.append(arr1)
        features.append(np.array((np.sqrt(arr1), arr1**2)))
        y.append(arr1.sum())
    return features, np.array(y), numbers

def make_easyset(num_instances, min_comps, max_comps):
    comps = [] # features
    pair_sums = [] # true reconstructed
    sums = [] # y

    for i in range(num_instances):
        arr1 = np.random.randint(1, 10, size=(np.random.randint(1, max_comps), 2))
        comps.append(np.array(arr1))
        pair_sums.append(arr1.sum(axis=-1))
        sums.append(arr1.sum())
    return comps, np.array(sums), pair_sums


In [None]:
class MLP(keras.Model):
    def __init__(self, n_inputs, layers, output_func="linear"):
        super().__init__()
        # subnetwork used to evaluate atomic potential contributions, evaluated for each atom in a structure.
        self.subnet = keras.Sequential(layers=[
            keras.Input(shape=(n_inputs,))]  # input layer takes in n_inputs number of symmetry function features
            + layers
            + [keras.layers.Dense(1, activation=output_func)])  # output layer returns individual energy contributions
        self.num_features = n_inputs

    def call(self, inputs, training=False):
        '''
        feed-forward method for the model
        should have the signature we ultimately want the model to have,
        i.e. for one structure: Tensor[StructureFeatures] -> Energy_total

        which then for multiple structures:
            Tensor[Tensor[StructureFeatures]] -> Tensor[Energy_total]

        wherein in reality the outermost Tensor is just a list.

        inputs: shape (num_atoms, num_features) tensor

        (None, None, 2)

        features: number of numbers, average of numbers
        '''

        #  subnet.call(Tensor[StructureFeatures]) -> Tensor[EnergyContributions]

        numsum = []

        def process_struct(struct):
            return self.subnet(struct, training=training)

        pairwise_contribs = tf.map_fn(process_struct, inputs, fn_output_signature=tf.RaggedTensorSpec(ragged_rank=0))
        numsum.append(tf.reduce_sum(pairwise_contribs, axis=1))

        return numsum

class GlobalProgressBar(keras.callbacks.Callback):
    def __init__(self, total_epochs, total_batches):
        super().__init__()
        self.total_epochs = total_epochs
        self.total_batches = total_batches
        self.progress_bar = tqdm(total=total_epochs * total_batches, desc="Training Progress")

    def on_batch_end(self, batch, logs=None):
        self.progress_bar.update(1)  # Update per batch

    def on_train_end(self, logs=None):
        self.progress_bar.close()

In [None]:
features, sums, trues = make_dataset(1, 20, 5, 1000)
stacked = np.hstack(features).T

SSC = StandardScaler().fit(stacked)

scaled_features = [SSC.transform(struct.T) for struct in features]


Xtrain, Xtest, ytrain, ytest, truetrain, truetest = train_test_split(scaled_features, sums, trues, test_size=0.4)
Xval, Xtest, yval, ytest, trueval, truetest = train_test_split(Xtest, ytest, truetest, test_size=0.5)

Xtrain = tf.ragged.constant(Xtrain, ragged_rank=1, inner_shape=(2,))
Xval = tf.ragged.constant(Xval, ragged_rank=1, inner_shape=(2,))
Xtest = tf.ragged.constant(Xtest, ragged_rank=1, inner_shape=(2,))

In [None]:
MLP1 = MLP(n_inputs=2, layers=[keras.layers.Dense(100, activation="relu"), keras.layers.Dense(100, activation="relu")])

MLP1.compile(
    optimizer='adam',
    loss="mean_squared_error"
)
print(Xtrain.shape)

epochs=400
batch_size = 20
total_batches = Xtrain.shape[0] // batch_size

res = MLP1.fit(
    Xtrain, ytrain,
    batch_size = batch_size,
    epochs = epochs,
    validation_data = (Xval, yval),
    verbose = 0,
    callbacks=[tqdm.keras.TqdmCallback()]
)


In [None]:
ytest - MLP1.predict(Xtest)[0].flatten()
print(MLP1.subnet.predict(Xtrain)[0])
print(truetrain[0])

In [None]:
# X, y, y_contrib = make_dataset(0, 25, 54, 500)


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Define the input with ragged=True
input_layer = keras.Input(shape=(None, 10), ragged=True)

# Example: Use a Masking layer (optional, depending on the architecture)
x = layers.Masking(mask_value=0.0)(input_layer)  

# Example: Process with a LSTM layer
x = layers.LSTM(32)(x)

# Output layer
output = layers.Dense(1, activation="sigmoid")(x)

# Define the model
model = keras.Model(inputs=input_layer, outputs=output)
model.compile(optimizer="adam", loss="binary_crossentropy")


In [None]:
import numpy as np

# Create example ragged data (N_instances=3, variable sequence lengths)
ragged_data = tf.ragged.constant([
    np.random.rand(3, 10),  # 3 timesteps
    np.random.rand(5, 10),  # 5 timesteps
    np.random.rand(2, 10)   # 2 timesteps
], ragged_rank=1)

print(ragged_data.shape)

# Example labels
labels = np.array([0, 1, 0])  # Binary classification labels

# Train the model
print(tf.unstack(ragged_data, axis = 1))
