In [1]:
import mlflow
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.saving import load_model

2024-08-28 22:39:13.066432: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# MLFLOW SERVER

In [2]:
mlflow.autolog()
host = "127.0.0.1"
mlflow.set_tracking_uri(uri=f"http://{host}:8080")

experiment_name = "same_times_sampled_randomly"
# mlflow.create_experiment(experiment_name)
mlflow.set_experiment(experiment_name)

2024/08/28 22:39:22 INFO mlflow.tracking.fluent: Autologging successfully enabled for keras.
2024/08/28 22:39:22 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.


<Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1718116933780, experiment_id='0', last_update_time=1724884453613, lifecycle_stage='active', name='same_times_sampled_randomly', tags={}>

# DATA LOADING

In [169]:
dataset = np.load("/home/dynamix/datasets/correlations.npz")
correlation_matrices = dataset["correlation_matrices"]
parameters = dataset["ab_labels"]
initial_values = dataset["xy_labels"]

In [None]:
# verifying that there are no nan values
flat = correlation_matrices.flatten()
nans = [int(True == bool(element)) for element in flat]
print(sum(nans) == len(flat))

# DATASET SEGMENTATION

In [170]:
total_iv = 25
total_parameters = 231
matrices_per_parameter = 2000
matrices_per_iv = matrices_per_parameter*total_parameters
total_matrices = matrices_per_iv*total_iv
print("matrices per iv:", matrices_per_iv)
print("total matrices:", total_matrices)

matrices per iv: 462000
total matrices: 11550000


In [171]:
total_train = round(total_matrices*0.7)
total_validation = round(total_matrices*0.2)
total_test = round(total_matrices*0.1)
print("total train:", total_train, "\ntotal validation:", total_validation, "\ntotal test:", total_test)

x_train, y_train = np.zeros(shape=(total_train, 2, 2)), np.zeros(shape=(total_train, 2))
x_validation, y_validation = np.zeros(shape=(total_validation, 2, 2)), np.zeros(shape=(total_validation, 2))
x_test, y_test = np.zeros(shape=(total_test, 2, 2)), np.zeros(shape=(total_test, 2))

train = round(matrices_per_iv*0.7)
validation = round(matrices_per_iv*0.2)
test = round(matrices_per_iv*0.1)
print("mini train:", train, "\nmini validation:", validation, "\nmini test:", test)

t = np.arange(0, total_train+1, train)
v = np.arange(0, total_validation+1, validation)
tt = np.arange(0, total_test+1, test)
d = np.arange(0, total_matrices+1, matrices_per_iv)
# print(t)
# print(d)

for i in range(len(data_indices)-1):
    x_train[t[i]:t[i+1]] = correlation_matrices[d[i]:d[i+1]][:train]
    y_train[t[i]:t[i+1]] = parameters[d[i]:d[i+1]][:train]
    
    x_validation[v[i]:v[i+1]] = correlation_matrices[d[i]:d[i+1]][train:train+validation]
    y_validation[v[i]:v[i+1]] = parameters[d[i]:d[i+1]][train:train+validation]
    
    x_test[tt[i]:tt[i+1]] = correlation_matrices[d[i]:d[i+1]][train+validation:]
    y_test[tt[i]:tt[i+1]] = parameters[d[i]:d[i+1]][train+validation:]

total train: 8085000 
total validation: 2310000 
total test: 1155000
mini train: 323400 
mini validation: 92400 
mini test: 46200


In [172]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
validation_dataset = tf.data.Dataset.from_tensor_slices((x_validation, y_validation))
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))

# NEURAL NETWORK

In [176]:
model = Sequential()
model.add(tf.keras.Input(shape=(2, 2)))
model.add(Flatten())
# model.add(Dense(300, activation="relu"))
# model.add(Dense(250, activation="relu"))
# model.add(Dense(200, activation="relu"))
# model.add(Dense(150, activation="relu"))
# model.add(Dense(100, activation="relu"))
model.add(Dense(50, activation="relu"))
model.add(Dense(50, activation="relu"))
model.add(Dense(2))

model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_11 (Flatten)        (None, 4)                 0         
                                                                 
 dense_33 (Dense)            (None, 50)                250       
                                                                 
 dense_34 (Dense)            (None, 50)                2550      
                                                                 
 dense_35 (Dense)            (None, 2)                 102       
                                                                 
Total params: 2,902
Trainable params: 2,902
Non-trainable params: 0
_________________________________________________________________


In [177]:
params={
    "batch_size" : 64,
    "epochs" : 20,
    "learning_rate" : 0.0001,
    "loss" : "mean_absolute_error"
}

In [178]:
model.compile(optimizer=Adam(learning_rate=params["learning_rate"]), loss=params["loss"], metrics=["accuracy"])
history = model.fit(x=x_train, y=y_train, 
          validation_data = (x_validation, y_validation), 
          shuffle = True, 
          batch_size = params["batch_size"], 
          epochs = params["epochs"], 
          verbose = 1
)

2024/08/29 04:17:17 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '770a253f1a644cfb8b71c6326a870508', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/20
  5354/126329 [>.............................] - ETA: 8:26 - loss: 1.3689 - accuracy: 0.9481

KeyboardInterrupt: 

In [66]:
model.save("{}.keras".format(input("Enter savename for the model: ")))
predictions = model.predict(x_test, verbose=2)
print(y_test)
print(predictions)

Enter savename for the model:  best2_loss29_acc97


69/69 - 0s - 3ms/step
[[1.   1.  ]
 [1.2  1.44]
 [1.4  1.96]
 ...
 [2.6  6.76]
 [2.8  7.84]
 [3.   9.  ]]
[[1.1380543 1.1492687]
 [1.17018   1.257448 ]
 [1.3818299 1.9701506]
 ...
 [2.763996  7.3746567]
 [2.8304937 7.6391153]
 [2.6955729 7.1025405]]
