In [137]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from pathlib import Path

In [138]:
def give_me_x_and_y_per_node(df_general_results, name):
    #take data from the df
    df = df_general_results[df_general_results['Node Name'] == name]

    x_values = df['Data Size'].values
    y_values = df['Total-time'].values

    # filter out non unique values
    unique_x, unique_indices = np.unique(x_values, return_index=True)
    unique_y = y_values[unique_indices]

    #sort values (for easier fitting and interpolation)
    sorted_indices = np.argsort(unique_x)

    return unique_x[sorted_indices], unique_y[sorted_indices]


def build_big(name_list):
    result = pd.DataFrame({'Node Name': np.empty(1), 'Total-time': np.empty(1), 'Data Size': np.empty(1)})
    for path in name_list:
        test = pd.read_csv(path, sep='\t')
        test_general_results = test[['Node Name', 'Total-time', 'Data Size']]
        result = pd.concat([result, test_general_results])
    
    return result

In [139]:
path_finals = [Path("/Users/manuez42/Desktop/fonda/A2-job-granularity/MG-HIBF/general_results_fonda/general_results_viral500m.tsv"),
               Path("/Users/manuez42/Desktop/fonda/A2-job-granularity/MG-HIBF/general_results_fonda/general_results_human9g.tsv"),
               Path("/Users/manuez42/Desktop/fonda/A2-job-granularity/MG-HIBF/general_results_fonda/general_results_archea1.4g.tsv"),
               Path("/Users/manuez42/Desktop/fonda/A2-job-granularity/MG-HIBF/general_results_fonda/general_results_bacteria30g.tsv"),
               Path("/Users/manuez42/Desktop/fonda/A2-job-granularity/MG-HIBF/general_results_fonda/general_results_bacteria58g.tsv"),
               Path("/Users/manuez42/Desktop/fonda/A2-job-granularity/MG-HIBF/general_results_fonda/general_results_bacteria88g.tsv"),
               Path("/Users/manuez42/Desktop/fonda/A2-job-granularity/MG-HIBF/general_results_fonda/general_results_bacteria125g.tsv")]

def x_y(all_data, test_nodenames):
    x,y = [0], [0]
    while(len(x) < 10 or len(y) <10):
        name = np.random.choice(test_nodenames, 1)
        x,y = give_me_x_and_y_per_node(all_data, name[0])
    return x, y

all_data = build_big(path_finals)
test_nodenames = np.unique(all_data['Node Name'].values)
name = np.random.choice(test_nodenames, 1)

x,y = x_y(all_data, test_nodenames)

In [140]:
# test = pd.read_csv('../general_results_fonda/general_results_bacteria125g.tsv', sep='\t')
# test_general_results = test[['Node Name', 'Total-time', 'Data Size']]
# test_nodenames = np.unique(test['Node Name'].values)

model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(1,)))
model.add(tf.keras.layers.Dense(3, activation="relu", use_bias=True))
model.add(tf.keras.layers.Dense(3, activation="relu", use_bias=True))
model.add(tf.keras.layers.Dense(3, activation="relu", use_bias=True))
model.add(tf.keras.layers.Dense(1, activation="linear", use_bias=True))
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.RMSprop(20), metrics=['accuracy'])
model.fit(x, y, epochs=100, verbose=0, validation_split=0.2)

accuracy, loss = model.evaluate(x, y, verbose=0)
y_pred = model.predict(x)
mse = mean_squared_error(y, y_pred)

print("Accuracy for model:", "%0.3f" % accuracy)
print("Loss:", loss)
print("Mean Squared Error on Test Data:", mse)
print("X: ", x[:10], " len=", len(x))
print("y_pred: ",y_pred[:10], " len=", len(y_pred))
print("Y: ",y[:10], " len=", len(y))

Accuracy for model: 7784.562
Loss: 0.0
Mean Squared Error on Test Data: 522069.60011158895
X:  [ 1.6  4.8  6.8  7.3  7.6  7.7 11.  12.  13.  18. ]  len= 151
y_pred:  [[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]  len= 151
Y:  [1.7647 3.4547 4.026  4.0773 4.1744 4.3086 6.0559 6.0429 9.2126 7.8885]  len= 151


In [141]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=1, input_shape=(1,), activation='linear', use_bias=True)
])
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=50), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x, y, epochs=300, verbose=0, validation_split=0.2)

loss = model.evaluate(x, y, verbose=0)
y_pred = model.predict(x)
mse = mean_squared_error(y, y_pred)

# print("Accuracy for model:", "%0.3f" % accuracy)
print("Loss:", loss)
print("Mean Squared Error on Test Data:", mse)
print("X: ", x[:10], " len=", len(x))
print("y_pred: ",y_pred[:10], " len=", len(y_pred))
print("Y: ",y[:10], " len=", len(y))

Loss: [7784.56201171875, 0.0]
Mean Squared Error on Test Data: 4264860.233615223
X:  [ 1.6  4.8  6.8  7.3  7.6  7.7 11.  12.  13.  18. ]  len= 151
y_pred:  [[ -2.7416503]
 [ -8.224952 ]
 [-11.652015 ]
 [-12.5087805]
 [-13.02284  ]
 [-13.194192 ]
 [-18.848846 ]
 [-20.562378 ]
 [-22.27591  ]
 [-30.843567 ]]  len= 151
Y:  [1.7647 3.4547 4.026  4.0773 4.1744 4.3086 6.0559 6.0429 9.2126 7.8885]  len= 151


In [142]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=4, activation='relu', input_shape=(1,)),
    tf.keras.layers.Dense(units=8, activation='relu'),
    tf.keras.layers.Dense(units=16, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=16, activation='relu'),
    tf.keras.layers.Dense(units=8, activation='relu'),
    tf.keras.layers.Dense(units=1, activation='relu')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x, y, epochs=1000, verbose=0, validation_split=0.2)

loss = model.evaluate(x, y, verbose=0)
y_pred = model.predict(x)
mse = mean_squared_error(y, y_pred)

# print("Accuracy for model:", "%0.3f" % accuracy)
print("Loss:", loss)
print("Mean Squared Error on Test Data:", mse)
print("X: ", x[:10], " len=", len(x))
print("y_pred: ",y_pred[:10], " len=", len(y_pred))
print("Y: ",y[:10], " len=", len(y))