In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
import pandas as pd
import ast
from sklearn.model_selection import train_test_split

# Data Preparation Functions

In [2]:
def create_adjacency_matrices(df):
    df['adjacency_matrix'] = None
    # Step 3: Iterate through Rows and Populate the New Column
    for index, row in df.iterrows(): 
        # Convert String to a real list !
        edges = ast.literal_eval(row['edges'])
        # Determine the number of nodes
        num_nodes = max(max(edge) for edge in edges) + 1
        # Create an empty adjacency matrix
        adjacency_matrix = np.zeros((num_nodes, num_nodes), dtype=int)
        # Fill the adjacency matrix based on the edges
        for edge in edges:
            adjacency_matrix[edge[0], edge[1]] = 1
            adjacency_matrix[edge[1], edge[0]] = 1  # Assuming the graph is undirected

        # Assign the adjacency matrix to the new column
        df.at[index, 'adjacency_matrix'] = adjacency_matrix
        
    return df

## Apply Data Preparation 

In [3]:
# Reading the File 
df = pd.read_csv('datacvxpy8.csv')

In [4]:
create_adjacency_matrices(df)
df['node_assignment'] = df['node_assignment'].apply(ast.literal_eval)

In [5]:
df.head()

Unnamed: 0,n,edges,node_assignment,adjacency_matrix
0,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, 1, -1, -1, -1, -1]","[[0, 1, 1, 1, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1, ..."
1,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, 1, -1, -1, -1, -1]","[[0, 1, 1, 1, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1, ..."
2,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, 1, -1, -1, -1, -1]","[[0, 1, 1, 1, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1, ..."
3,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, 1, -1, -1, -1, -1]","[[0, 1, 1, 1, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1, ..."
4,8,"[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6...","[1, 1, 1, 1, -1, -1, -1, -1]","[[0, 1, 1, 1, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1, ..."


In [7]:
# Step 3: Split Data
X_train, X_temp, y_train, y_temp = train_test_split(df['adjacency_matrix'].values, df['node_assignment'].values, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [8]:
# Convert lists to numpy arrays
X_train = np.array(X_train.tolist()).astype('float32')
y_train = np.array(y_train.tolist()).astype('float32')
X_val = np.array(X_val.tolist()).astype('float32')
y_val = np.array(y_val.tolist()).astype('float32')
X_test = np.array(X_test.tolist()).astype('float32')
y_test = np.array(y_test.tolist()).astype('float32')

In [9]:
X_train.shape

(327, 8, 8)

# Build the GNN model 

In [10]:
# Define GNN model
class GNN(Model):
    def __init__(self):
        super(GNN, self).__init__()
        self.conv1 = layers.Conv1D(16, 1, activation='relu')  # Convolutional layer
        self.conv1 = layers.Conv1D(32, 3, activation='relu')  # Convolutional layer
        self.flatten = layers.Flatten()
        self.dense1 = layers.Dense(64, activation='relu')  # Dense layer
        self.dense2 = layers.Dense(8)  # Output layer with 8 units
        self.built = True  # Set model as built

    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.flatten(x)
        x = self.dense1(x)
        return self.dense2(x)

In [11]:
# Initialize model
model = GNN()

optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

# Compile model
model.compile(optimizer='adam', loss='mse')  # Using Mean Squared Error loss for regression task

# Print model summary
model.build((None, 8, 8))  # Manually build the model with input shape
model.summary()


# Train model
model.fit(X_train, y_train, epochs=200,validation_data=(X_val, y_val))

Model: "gnn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_1 (Conv1D)           multiple                  800       
                                                                 
 flatten (Flatten)           multiple                  0         
                                                                 
 dense (Dense)               multiple                  12352     
                                                                 
 dense_1 (Dense)             multiple                  520       
                                                                 
Total params: 13672 (53.41 KB)
Trainable params: 13672 (53.41 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200

Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoc

Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<keras.src.callbacks.History at 0x23c833ce190>

In [12]:
# Evaluate model 
loss = model.evaluate(X_train, y_train)
print("Average Loss:", loss)

Average Loss: 0.04388255625963211


# Predict on X and y of n equal to 8 

In [13]:
# Make predictions on the test set
predictions = model.predict(X_test)

# Define the threshold
threshold = 0.0  

# Apply the threshold to the predictions
binary_predictions = np.where(predictions > threshold, 1, -1)



# Strict Accuracy

Strict accuracy is a metric that measures the percentage of predictions that match the true values exactly. In other words, it evaluates whether all elements of a prediction match the corresponding elements of the true values. For example, if a prediction array [1, 0, 1, 1] is compared to the true array [1, 0, 1, 1], strict accuracy would be 1 (or 100%), indicating that the prediction is entirely correct. However, if any element of the prediction mismatches the true value, strict accuracy would be 0 (or 0%).

In [14]:
def calculate_strict_accuracy(binary_predictions, y):
    """
    Calculate strict accuracy between binary predictions and true values.

    Args:
        binary_predictions (list of arrays): List of binary predictions.
        y (list of arrays): List of true values.

    Returns:
        float: Strict accuracy.
    """
    count_matches = 0
    count_unmatches = 0

    for i in range(len(binary_predictions)):
        # Convert to numpy arrays if not already
        binary_prediction_arr = np.array(binary_predictions[i])
        y_arr = np.array(y[i])
        
        # Check if the elements are equal
        if np.array_equal(binary_prediction_arr, y_arr):
            count_matches += 1
        else:
            count_unmatches += 1
    # Calculate strict accuracy
    strict_accuracy = count_matches / (count_matches + count_unmatches) if (count_matches + count_unmatches) > 0 else 0
    
    return strict_accuracy


In [15]:
strict_accuracy=calculate_strict_accuracy(binary_predictions,y_test)

In [16]:
strict_accuracy

0.7887323943661971

# Average Accuracy

 Average accuracy, on the other hand, calculates the accuracy for eachindividual prediction separately and then averages these accuracies.This approach provides a more nuanced evaluation of the model's performance,as it considers the accuracy of each prediction independently. For instance,if a model makes three predictions [1, 0, 1], [0, 1, 0], and [1, 1, 1],and the true values are [1, 0, 0], [0, 1, 1], and [1, 1, 1], respectively,the average accuracy would be calculated by averaging the accuracies of these individual predictions. This allows for a more granular assessmentof the model's performance across multiple predictions.

In [17]:
def calculate_average_accuracy(binary_predictions, y):
    """
    Calculate accuracy between binary predictions and true values.

    Args:
        binary_predictions (list of arrays): List of binary predictions.
        y (list of arrays): List of true values.

    Returns:
        float: Average accuracy.
    """
    accuracies = []

    for i in range(len(binary_predictions)):
        # Convert to numpy arrays if not already
        binary_prediction_arr = np.array(binary_predictions[i])
        y_arr = np.array(y[i])
        
        # Check if the elements are equal
        accuracy = np.mean(binary_prediction_arr == y_arr)
        accuracies.append(accuracy)
        
    # Calculate average accuracy
    average_accuracy = np.mean(accuracies)
    
    return average_accuracy

In [18]:
average_accuracy=calculate_average_accuracy(binary_predictions,y_test)

In [19]:
average_accuracy

0.9630281690140845