In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
import pandas as pd
import ast
from sklearn.model_selection import train_test_split

# Data Preparation Functions

In [2]:
def create_adjacency_matrices(df):
    df['adjacency_matrix'] = None
    # Step 3: Iterate through Rows and Populate the New Column
    for index, row in df.iterrows(): 
        # Convert String to a real list !
        edges = ast.literal_eval(row['edges'])
        # Determine the number of nodes
        num_nodes = max(max(edge) for edge in edges) + 1
        # Create an empty adjacency matrix
        adjacency_matrix = np.zeros((num_nodes, num_nodes), dtype=int)
        # Fill the adjacency matrix based on the edges
        for edge in edges:
            adjacency_matrix[edge[0], edge[1]] = 1
            adjacency_matrix[edge[1], edge[0]] = 1  # Assuming the graph is undirected

        # Assign the adjacency matrix to the new column
        df.at[index, 'adjacency_matrix'] = adjacency_matrix
        
    return df

In [3]:
def Transform_DictEdges_to_ArrayEdges(d):
    d = ast.literal_eval(d) 
    max_key = max(d.keys())
    result_array = [0] * (max_key + 1)
    for key, value in d.items():
        result_array[key] = value
    return result_array

## Padding

In [4]:
def add_padding_matrix(matrix,padding_position):
    n = len(matrix)
    padded_row = np.zeros((1, n))
    padded_col = np.zeros((n+1, 1))

    # Insert padded row at specified position
    matrix = np.concatenate((matrix[:padding_position], padded_row, matrix[padding_position:]), axis=0)
    
    # Insert padded column at specified position
    matrix = np.concatenate((matrix[:, :padding_position], padded_col, matrix[:, padding_position:]), axis=1)
    
    return matrix

In [5]:
def add_padding_array(array, position):
    return np.insert(array, position, 0)

In [6]:
def add_padding_to_dataframe(dataset, n):
    for index, row in dataset.iterrows():
        padding_position = np.random.randint(0, n)
        dataset.at[index, 'adjacency_matrix'] = add_padding_matrix(row['adjacency_matrix'], padding_position)
        dataset.at[index, 'node_assignment_array'] = add_padding_array(row['node_assignment_array'], padding_position)

## Apply Data Preparation 

In [10]:
# Reading the File 
df = pd.read_csv('Data_n_equal_7.csv')

In [11]:
create_adjacency_matrices(df)
# Convert dictionary values to list
df['node_assignment_array'] = df['node_assignment'].apply(lambda x: Transform_DictEdges_to_ArrayEdges(x))
add_padding_to_dataframe(df,7)

In [12]:
# Step 3: Split Data
X_train, X_temp, y_train, y_temp = train_test_split(df['adjacency_matrix'].values, df['node_assignment_array'].values, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [13]:
# Convert lists to numpy arrays
X_train = np.array(X_train.tolist()).astype('float32')
y_train = np.array(y_train.tolist()).astype('float32')
X_val = np.array(X_val.tolist()).astype('float32')
y_val = np.array(y_val.tolist()).astype('float32')
X_test = np.array(X_test.tolist()).astype('float32')
y_test = np.array(y_test.tolist()).astype('float32')

In [14]:
X_train.shape

(597, 8, 8)

# Build the GNN model 

In [15]:
# Define GNN model
class GNN(Model):
    def __init__(self):
        super(GNN, self).__init__()
        self.conv1 = layers.Conv1D(16, 1, activation='relu')  # Convolutional layer
        self.conv1 = layers.Conv1D(32, 3, activation='relu')  # Convolutional layer
        self.flatten = layers.Flatten()
        self.dense1 = layers.Dense(64, activation='relu')  # Dense layer
        self.dense2 = layers.Dense(8)  # Output layer with 8 units
        self.built = True  # Set model as built

    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.flatten(x)
        x = self.dense1(x)
        return self.dense2(x)

In [16]:
# Initialize model
model = GNN()

optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

# Compile model
model.compile(optimizer='adam', loss='mse')  # Using Mean Squared Error loss for regression task

# Print model summary
model.build((None, 8, 8))  # Manually build the model with input shape
model.summary()


# Train model
model.fit(X_train, y_train, epochs=70,validation_data=(X_val, y_val))

Model: "gnn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_1 (Conv1D)           multiple                  800       
                                                                 
 flatten (Flatten)           multiple                  0         
                                                                 
 dense (Dense)               multiple                  12352     
                                                                 
 dense_1 (Dense)             multiple                  520       
                                                                 
Total params: 13672 (53.41 KB)
Trainable params: 13672 (53.41 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70


<keras.src.callbacks.History at 0x22d23bb0700>

In [None]:
# Evaluate model 
loss = model.evaluate(X_train, y_train)
print("Average Loss:", loss)

# Predict on X and y of n equal to 8 

In [46]:
df_8 = pd.read_csv('datacvxpy8.csv')

In [47]:
df_8

Unnamed: 0,n,edges,node_assignment
0,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, 1, -1, -1, -1, -1]"
1,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, 1, -1, -1, -1, -1]"
2,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, 1, -1, -1, -1, -1]"
3,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, 1, -1, -1, -1, -1]"
4,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, 1, -1, -1, -1, -1]"
...,...,...,...
463,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, -1, -1, 1, -1, -1]"
464,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, -1, -1, -1, -1, -1]"
465,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, -1, -1, -1, -1, -1, 1]"
466,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, -1, -1, -1, -1, -1]"


In [49]:
create_adjacency_matrices(df_8)
df_8['node_assignment'] = df_8['node_assignment'].apply(ast.literal_eval)

In [50]:
X=df_8['adjacency_matrix'].values
y=df_8['node_assignment'].values

In [53]:
# Convert lists to numpy arrays
X = np.array(X.tolist()).astype('float32')
y = np.array(y.tolist()).astype('float32')

In [54]:
# Make predictions on the test set
predictions = model.predict(X)

# Define the threshold
threshold = 0.0  

# Apply the threshold to the predictions
binary_predictions = np.where(predictions > threshold, 1, -1)



# Strict Accuracy

Strict accuracy is a metric that measures the percentage of predictions that match the true values exactly. In other words, it evaluates whether all elements of a prediction match the corresponding elements of the true values. For example, if a prediction array [1, 0, 1, 1] is compared to the true array [1, 0, 1, 1], strict accuracy would be 1 (or 100%), indicating that the prediction is entirely correct. However, if any element of the prediction mismatches the true value, strict accuracy would be 0 (or 0%).

In [55]:
def calculate_strict_accuracy(binary_predictions, y):
    """
    Calculate strict accuracy between binary predictions and true values.

    Args:
        binary_predictions (list of arrays): List of binary predictions.
        y (list of arrays): List of true values.

    Returns:
        float: Strict accuracy.
    """
    count_matches = 0
    count_unmatches = 0

    for i in range(len(binary_predictions)):
        # Convert to numpy arrays if not already
        binary_prediction_arr = np.array(binary_predictions[i])
        y_arr = np.array(y[i])
        
        # Check if the elements are equal
        if np.array_equal(binary_prediction_arr, y_arr):
            count_matches += 1
        else:
            count_unmatches += 1
    # Calculate strict accuracy
    strict_accuracy = count_matches / (count_matches + count_unmatches) if (count_matches + count_unmatches) > 0 else 0
    
    return strict_accuracy


In [56]:
strict_accuracy=calculate_strict_accuracy(binary_predictions,y)

In [57]:
strict_accuracy

0.41025641025641024

# Average Accuracy

 Average accuracy, on the other hand, calculates the accuracy for eachindividual prediction separately and then averages these accuracies.This approach provides a more nuanced evaluation of the model's performance,as it considers the accuracy of each prediction independently. For instance,if a model makes three predictions [1, 0, 1], [0, 1, 0], and [1, 1, 1],and the true values are [1, 0, 0], [0, 1, 1], and [1, 1, 1], respectively,the average accuracy would be calculated by averaging the accuracies of these individual predictions. This allows for a more granular assessmentof the model's performance across multiple predictions.

In [27]:
def calculate_average_accuracy(binary_predictions, y):
    """
    Calculate accuracy between binary predictions and true values.

    Args:
        binary_predictions (list of arrays): List of binary predictions.
        y (list of arrays): List of true values.

    Returns:
        float: Average accuracy.
    """
    accuracies = []

    for i in range(len(binary_predictions)):
        # Convert to numpy arrays if not already
        binary_prediction_arr = np.array(binary_predictions[i])
        y_arr = np.array(y[i])
        
        # Check if the elements are equal
        accuracy = np.mean(binary_prediction_arr == y_arr)
        accuracies.append(accuracy)
        
    # Calculate average accuracy
    average_accuracy = np.mean(accuracies)
    
    return average_accuracy

In [58]:
average_accuracy=calculate_average_accuracy(binary_predictions,y)

In [59]:
average_accuracy

0.843482905982906