In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import tensorflow_gnn as tfgnn
import tensorflow as tf
tf.get_logger().setLevel('ERROR')

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

from functools import partial

from model import *
from utils import *

In [2]:
negative_methods = ["same_degree_distribution", "most_close", "most_distant"]

k = 5

# define the search space
space = {
    'learning_rate': hp.loguniform('learning_rate', -6, -2),
    'optimizer': hp.choice('optimizer', ['sgd', 'adam', 'nadam']),
    'activation': hp.choice('activation', ['relu', 'sigmoid', 'tanh', 'leaky_relu'])
}

results = {}

In [3]:
# make and early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

In [4]:
from sklearn.metrics import accuracy_score

In [5]:
def objective(params, negative_method):
    # unpack the parameters
    learning_rate = params['learning_rate']
    optimizer_type = params['optimizer']
    activation = params['activation']



    # Create a graph of protein interactions for each method
    graphToTensor = GraphToTensor(negative_interaction_method=negative_method)
    graph_tensor = graphToTensor.graph_tensor

    dataset = create_dataset(graph_tensor, edge_batch_merge)

    graph_spec = dataset.element_spec[0]
    input_graph = tf.keras.layers.Input(type_spec=graph_spec)

    # Generate k-folds for each method
    kfolds = graphToTensor.generate_graph_tensors_for_k_folds(k_folds=k)

    # for storing y and yhat across all folds
    y_folds = []
    yhat_folds = []

    for i, (train_graph, test_graph) in enumerate(kfolds):

        # Create datasets for this fold
        train_dataset = create_dataset(train_graph, edge_batch_merge)
        test_dataset = create_dataset(test_graph, edge_batch_merge)

        model = create_model(input_graph, graph_updates=3, activation=activation)

        # choose the optimizer
        if optimizer_type == 'sgd':
            optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
        elif optimizer_type == 'adam':
            optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        elif optimizer_type == 'nadam':
            optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate)

        # Compile the model
        model.compile(
            optimizer,
            loss='binary_crossentropy',
            metrics=['Accuracy']
        )
            
        # Fit the model for this fold
        model.fit(
            train_dataset.repeat(),
            epochs=1000,
            steps_per_epoch=10,
            validation_data=test_dataset.repeat(),
            validation_steps=10,
            callbacks=[early_stopping],
            verbose=0
        )

        # store y and yhat for this fold
        yhat = model.predict(test_dataset, verbose=0)
        y = test_dataset.map(lambda graph, labels: labels).unbatch()
        y = np.array(list(y.as_numpy_iterator()))
        yhat = np.array(list(yhat))

        y_folds.append(y)
        yhat_folds.append(yhat)

        # Delete the model to free up memory
        del model

    # get the predictions for all folds
    y = np.concatenate(y_folds)
    yhat = np.concatenate(yhat_folds)

    accuracy = accuracy_score(y, yhat.round())

    # use accuracy as the objective to maximize
    return {'loss': -accuracy, 'status': STATUS_OK}

In [6]:
best_params_per_method = {}

for method in negative_methods:
    print(f'Optimizing for {method}...')
    # create a partial function with the negative method as a parameter
    objective_partial = partial(objective, negative_method=method)

    # run the hyperparameter optimization
    trials = Trials()
    best = fmin(
        fn=objective_partial,
        space=space,
        algo=tpe.suggest,
        max_evals=10,
        trials=trials
    )

    # store the best parameters for this method
    best_params_per_method[method] = best

    # store the results
    results[method] = trials

Optimizing for most_distant...
 10%|█         | 1/10 [09:48<1:28:14, 588.30s/trial, best loss: -0.9541086865700421]


KeyboardInterrupt: 

: 

In [None]:
# print the best parameters for each method
for method, best_params in best_params_per_method.items():
    print(f'Best parameters for {method}: {best_params}')

Best parameters for random_pairs:
{'activation': 1, 'learning_rate': 0.08713927895359477, 'optimizer': 2}
Best parameters for same_degree_distribution:
{'activation': 0, 'learning_rate': 0.058642938686967135, 'optimizer': 1}
Best parameters for most_close:
{'activation': 3, 'learning_rate': 0.05637113155272348, 'optimizer': 1}
Best parameters for most_distant:
{'activation': 0, 'learning_rate': 0.054942454073653696, 'optimizer': 2}


In [None]:
# import seaborn as sns
# import pandas as pd
# import matplotlib.pyplot as plt

# # Prepare data for DataFrame
# data = []
# for method, metrics in results.items():
#     for metric, score in metrics.items():
#         data.append([method, metric, score])

# # Create DataFrame
# results_df = pd.DataFrame(data, columns=['Methods', 'Metrics', 'Score'])

# # Create the plot
# plt.figure(figsize=(12,8))
# bar_plot = sns.barplot(x='Methods', y='Score', hue='Metrics', data=results_df, palette='muted', saturation=0.9)

# # Add labels to the top of the bars
# for p in bar_plot.patches:
#     bar_plot.annotate(format(p.get_height(), '.2f'), 
#                       (p.get_x() + p.get_width() / 2., p.get_height()), 
#                       ha = 'center', 
#                       va = 'center', 
#                       xytext = (0, 10), 
#                       textcoords = 'offset points')

# # Move the legend outside the plot
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

# # Show the plot
# plt.show()