In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import cv2
import numpy as np
import networkx as nx
from keras.datasets import mnist

def extract_sift_features(image):
    """Extract SIFT features from an image."""
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)
    return keypoints, descriptors

def calculate_similarity(descriptor1, descriptor2, method='euclidean'):
    """Calculate similarity/distance between two SIFT descriptors."""
    if method == 'euclidean':
        return np.linalg.norm(descriptor1 - descriptor2)
    elif method == 'cosine':
        return np.dot(descriptor1, descriptor2) / (np.linalg.norm(descriptor1) * np.linalg.norm(descriptor2))
    # Add more similarity/distance methods as needed

def construct_graph(images, labels, similarity_method='euclidean', threshold=100):
    """Construct a graph using SIFT features and specified similarity method."""
    G = nx.Graph()
    for i in range(len(images)):
        image = images[i]
        label = labels[i]  # MNIST labels are single integers
        keypoints, descriptors = extract_sift_features(image)
        if descriptors is not None and len(descriptors) > 0:  # Check if descriptors are available
            concatenated_descriptors = np.concatenate(descriptors, axis=0)
            descriptor_length = concatenated_descriptors.shape[0]
            G.add_node((i, label))
            for j in range(len(images)):
                if i != j:
                    other_image = images[j]
                    other_label = labels[j]  # MNIST labels are single integers
                    other_keypoints, other_descriptors = extract_sift_features(other_image)
                    if other_descriptors is not None and len(other_descriptors) > 0:  # Check if descriptors are available
                        other_concatenated_descriptors = np.concatenate(other_descriptors, axis=0)
                        other_descriptor_length = other_concatenated_descriptors.shape[0]
                        if descriptor_length == other_descriptor_length:
                            # Calculate similarity between descriptors
                            similarity = calculate_similarity(concatenated_descriptors, other_concatenated_descriptors, method=similarity_method)
                            # Add edge if similarity is above threshold
                            if similarity < threshold:
                                G.add_edge((i, label), (j, other_label), weight=similarity)
    return G




# Load MNIST dataset
(train_images, train_labels), (_, _) = mnist.load_data()

# Choose a subset of images for demonstration
num_images = 100
subset_images = train_images[:num_images]
subset_labels = train_labels[:num_images]

# Example usage:
graph = construct_graph(subset_images, subset_labels, similarity_method='cosine', threshold=0.9)
print("Number of nodes:", graph.number_of_nodes())
print("Number of edges:", graph.number_of_edges())


Number of nodes: 98
Number of edges: 338


In [None]:
pip install grakel

Collecting grakel
  Downloading GraKeL-0.1.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: grakel
Successfully installed grakel-0.1.10


In [None]:
import numpy as np
from grakel.kernels import WeisfeilerLehman
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load adjacency matrices from MUTAG_A.txt
adj_matrices = np.loadtxt("/content/drive/MyDrive/D22180_ACV/Group01_Nandani_Kajal_Assignment-3&4/Q6/MUTAG/MUTAG_A.txt", delimiter=',', dtype=int)

# Load graph indicator and graph labels
graph_indicator = np.loadtxt("/content/drive/MyDrive/D22180_ACV/Group01_Nandani_Kajal_Assignment-3&4/Q6/MUTAG/MUTAG_graph_indicator.txt", dtype=int)
graph_labels = np.loadtxt("/content/drive/MyDrive/D22180_ACV/Group01_Nandani_Kajal_Assignment-3&4/Q6/MUTAG/MUTAG_graph_labels.txt", dtype=int)

# Create a list of graphs
graphs = []
labels = []

# Split the adjacency matrices into individual graphs based on graph indicators
unique_graphs = np.unique(graph_indicator)
for graph_id in unique_graphs:
    graph_indices = np.where(graph_indicator == graph_id)[0]
    graph = adj_matrices[np.ix_(graph_indices, graph_indices)]  # Extract submatrix for the current graph
    graphs.append(graph)
    labels.append(graph_labels[graph_id - 1])  # Subtract 1 to account for 0-based indexing

# Convert labels to numpy array
labels = np.array(labels)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(graphs, labels, test_size=0.2, random_state=42)

# Create and initialize the WL kernel
wl_kernel = WeisfeilerLehman(n_iter=5, normalize=True)

# Compute kernel matrices for training and testing data
K_train = wl_kernel.fit_transform(X_train)
K_test = wl_kernel.transform(X_test)

# Train SVM classifier
clf = SVC(kernel='precomputed')
clf.fit(K_train, y_train)

# Evaluate classifier
y_pred = clf.predict(K_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


IndexError: index 2 is out of bounds for axis 1 with size 2

In [None]:
%cd /content/drive/MyDrive/D22180_ACV/Group01_Nandani_Kajal_Assignment-3&4/Q6/grakel

In [None]:
!wget https://storage.googleapis.com/download.tensorflow.org/data/mutag.zip
!unzip mutag.zip

In [None]:
pip install -q tensorflow-gnn

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m838.4/838.4 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.8/14.8 MB[0m [31m47.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m773.9/773.9 kB[0m [31m45.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.2/61.2 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m46.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.7/89.7 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
os.environ["TF_USE_LEGACY_KERAS"] = "1"  # For TF2.16+.

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_gnn as tfgnn

print(f'Running TF-GNN {tfgnn.__version__} with TensorFlow {tf.__version__}.')

"""### Download the MUTAG dataset
We have created a version of the MUTAG Dataset in TF-GNN's file format to use as an example in this colab.

Citation: [Morris, Christopher, et al. Tudataset: A collection of benchmark datasets for learning with graphs. arXiv preprint arXiv:2007.08663. 2020.](https://chrsmrrs.github.io/datasets/)
"""

# Download and unzip dataset.
#!wget https://storage.googleapis.com/download.tensorflow.org/data/mutag.zip
#!unzip mutag.zip

train_path = os.path.join(os.getcwd(), 'mutag', 'train.tfrecords')
val_path = os.path.join(os.getcwd(), 'mutag', 'val.tfrecords')
!ls -l {train_path} {val_path}

graph_tensor_spec = tfgnn.GraphTensorSpec.from_piece_specs(
    context_spec=tfgnn.ContextSpec.from_field_specs(features_spec={
                  'label': tf.TensorSpec(shape=(1,), dtype=tf.int32)
    }),
    node_sets_spec={
        'atoms':
            tfgnn.NodeSetSpec.from_field_specs(
                features_spec={
                    tfgnn.HIDDEN_STATE:
                        tf.TensorSpec((None, 7), tf.float32)
                },
                sizes_spec=tf.TensorSpec((1,), tf.int32))
    },
    edge_sets_spec={
        'bonds':
            tfgnn.EdgeSetSpec.from_field_specs(
                features_spec={
                    tfgnn.HIDDEN_STATE:
                        tf.TensorSpec((None, 4), tf.float32)
                },
                sizes_spec=tf.TensorSpec((1,), tf.int32),
                adjacency_spec=tfgnn.AdjacencySpec.from_incident_node_sets(
                    'atoms', 'atoms'))
    })


def decode_fn(record_bytes):
  graph = tfgnn.parse_single_example(
      graph_tensor_spec, record_bytes, validate=True)

  # extract label from context and remove from input graph
  context_features = graph.context.get_features_dict()
  label = context_features.pop('label')
  new_graph = graph.replace_features(context=context_features)

  return new_graph, label

train_ds = tf.data.TFRecordDataset([train_path]).map(decode_fn)
val_ds = tf.data.TFRecordDataset([val_path]).map(decode_fn)

"""### Look at one example from the dataset"""

g, y = train_ds.take(1).get_single_element()

"""#### Node features

Node features represent the 1-hot encoding of the atom type (0=C, 1=N, 2=O, 3=F,
4=I, 5=Cl, 6=Br).
"""

print(g.node_sets['atoms'].features[tfgnn.HIDDEN_STATE])

"""#### Bond Edges

In this example, we consider the bonds between atoms undirected edges. To encode
them in the GraphsTuple, we store the undirected edges as pairs of directed
edges in both directions.

`adjacency.source` contains the source node indices, and `adjacency.target` contains the corresponding target node indices.
"""

g.edge_sets['bonds'].adjacency.source

g.edge_sets['bonds'].adjacency.target

"""#### Edge features

Edge features represent the bond type as one-hot encoding.
"""

g.edge_sets['bonds'].features[tfgnn.HIDDEN_STATE]

"""### Label
The label is binary, indicating the mutagenicity of the molecule. It's either 0 or 1.
"""

y

"""#### Batch the datasets"""

batch_size = 32
train_ds_batched = train_ds.batch(batch_size=batch_size).repeat()
val_ds_batched = val_ds.batch(batch_size=batch_size)

Running TF-GNN 1.0.2 with TensorFlow 2.15.0.
/bin/bash: line 1: 4/Q1/mutag/val.tfrecords: No such file or directory
/bin/bash: line 1: 4/Q1/mutag/train.tfrecords: No such file or directory
ls: cannot access '/content/drive/MyDrive/D22180_ACV/Group01_Nandani_Kajal_Assignment-3': No such file or directory
tf.Tensor(
[[1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]], shape=(14, 7), dtype=float32)


In [None]:
import os
os.environ["TF_USE_LEGACY_KERAS"] = "1"  # For TF2.16+.

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_gnn as tfgnn
import grakel
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

print(f'Running TF-GNN {tfgnn.__version__} with TensorFlow {tf.__version__}.')

"""### Download the MUTAG dataset
We have created a version of the MUTAG Dataset in TF-GNN's file format to use as an example in this colab.

Citation: [Morris, Christopher, et al. Tudataset: A collection of benchmark datasets for learning with graphs. arXiv preprint arXiv:2007.08663. 2020.](https://chrsmrrs.github.io/datasets/)
"""

# Download and unzip dataset.
#!wget https://storage.googleapis.com/download.tensorflow.org/data/mutag.zip
#!unzip mutag.zip

train_path = os.path.join(os.getcwd(), 'mutag', 'train.tfrecords')
val_path = os.path.join(os.getcwd(), 'mutag', 'val.tfrecords')
!ls -l {train_path} {val_path}

graph_tensor_spec = tfgnn.GraphTensorSpec.from_piece_specs(
    context_spec=tfgnn.ContextSpec.from_field_specs(features_spec={
                  'label': tf.TensorSpec(shape=(1,), dtype=tf.int32)
    }),
    node_sets_spec={
        'atoms':
            tfgnn.NodeSetSpec.from_field_specs(
                features_spec={
                    tfgnn.HIDDEN_STATE:
                        tf.TensorSpec((None, 7), tf.float32)
                },
                sizes_spec=tf.TensorSpec((1,), tf.int32))
    },
    edge_sets_spec={
        'bonds':
            tfgnn.EdgeSetSpec.from_field_specs(
                features_spec={
                    tfgnn.HIDDEN_STATE:
                        tf.TensorSpec((None, 4), tf.float32)
                },
                sizes_spec=tf.TensorSpec((1,), tf.int32),
                adjacency_spec=tfgnn.AdjacencySpec.from_incident_node_sets(
                    'atoms', 'atoms'))
    })


def decode_fn(record_bytes):
  graph = tfgnn.parse_single_example(
      graph_tensor_spec, record_bytes, validate=True)

  # extract label from context and remove from input graph
  context_features = graph.context.get_features_dict()
  label = context_features.pop('label')
  new_graph = graph.replace_features(context=context_features)

  return new_graph, label

train_ds = tf.data.TFRecordDataset([train_path]).map(decode_fn)
val_ds = tf.data.TFRecordDataset([val_path]).map(decode_fn)

"""### Look at one example from the dataset"""

g, y = train_ds.take(1).get_single_element()

"""#### Node features

Node features represent the 1-hot encoding of the atom type (0=C, 1=N, 2=O, 3=F,
4=I, 5=Cl, 6=Br).
"""

print(g.node_sets['atoms'].features[tfgnn.HIDDEN_STATE])

"""#### Bond Edges

In this example, we consider the bonds between atoms undirected edges. To encode
them in the GraphsTuple, we store the undirected edges as pairs of directed
edges in both directions.

`adjacency.source` contains the source node indices, and `adjacency.target` contains the corresponding target node indices.
"""

g.edge_sets['bonds'].adjacency.source

g.edge_sets['bonds'].adjacency.target

"""#### Edge features

Edge features represent the bond type as one-hot encoding.
"""

g.edge_sets['bonds'].features[tfgnn.HIDDEN_STATE]

"""### Label
The label is binary, indicating the mutagenicity of the molecule. It's either 0 or 1.
"""

y

"""#### Batch the datasets"""

batch_size = 32
train_ds_batched = train_ds.batch(batch_size=batch_size).repeat()
val_ds_batched = val_ds.batch(batch_size=batch_size)

# Convert TensorFlow datasets to GraKeL-compatible format
X_train, y_train = [], []
for graph, label in train_ds_batched:
    # Convert graph to GraKeL-compatible format
    # Append to X_train and y_train
    X_train.append(graph)
    y_train.append(label)

X_val, y_val = [], []
for graph, label in val_ds_batched:
    # Convert graph to GraKeL-compatible format
    # Append to X_val and y_val
    X_val.append(graph)
    y_val.append(label)

# Step 3: Experiment with Graph Kernels
# Example: Use WL kernel
wl_kernel = grakel.WeisfeilerLehman(n_iter=5, normalize=True)
K_train = wl_kernel.fit_transform(X_train)
K_val = wl_kernel.transform(X_val)

# Step 4: Train and Evaluate Models
svm_classifier = SVC(kernel='precomputed')
svm_classifier.fit(K_train, y_train)
predictions = svm_classifier.predict(K_val)

# Step 5: Compare Performance
accuracy = accuracy_score(y_val, predictions)
print("Accuracy:", accuracy)

# Step 6: Present Results
# Visualize and present the results as needed


Running TF-GNN 1.0.2 with TensorFlow 2.15.0.
/bin/bash: line 1: 4/Q1/mutag/val.tfrecords: No such file or directory
/bin/bash: line 1: 4/Q1/mutag/train.tfrecords: No such file or directory
ls: cannot access '/content/drive/MyDrive/D22180_ACV/Group01_Nandani_Kajal_Assignment-3': No such file or directory
tf.Tensor(
[[1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]], shape=(14, 7), dtype=float32)


#WL kernel

In [6]:
import numpy as np
import networkx as nx
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.datasets import mnist

# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalize pixel values
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Reshape images to (28, 28)
X_train = X_train.reshape(-1, 28, 28)
X_test = X_test.reshape(-1, 28, 28)

# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Define function to convert image to graph
def image_to_graph(image):
    # Convert image to binary
    threshold = 0.5
    binary_image = (image > threshold).astype(int)

    # Create graph
    G = nx.grid_2d_graph(28, 28)

    # Add nodes and edges
    for i in range(28):
        for j in range(28):
            if binary_image[i, j] == 1:
                G.nodes[(i, j)]['label'] = 1  # Pixel is white
                if i > 0 and binary_image[i-1, j] == 1:
                    G.add_edge((i, j), (i-1, j))
                if i < 27 and binary_image[i+1, j] == 1:
                    G.add_edge((i, j), (i+1, j))
                if j > 0 and binary_image[i, j-1] == 1:
                    G.add_edge((i, j), (i, j-1))
                if j < 27 and binary_image[i, j+1] == 1:
                    G.add_edge((i, j), (i, j+1))

    return G

# Convert images to graphs
X_train_graphs = [image_to_graph(image) for image in X_train[:1000]]  # Taking subset for demonstration
X_test_graphs = [image_to_graph(image) for image in X_test[:1000]]    # Taking subset for demonstration


In [7]:
# Compute WL kernel matrix
kernel_matrix_train = wl_kernel(X_train_graphs, iterations=3)
kernel_matrix_test = wl_kernel(X_test_graphs, iterations=3)

In [9]:
from sklearn import svm
from sklearn.metrics import accuracy_score

# Train SVM classifier
clf = svm.SVC(kernel='precomputed')
clf.fit(kernel_matrix_train, y_train_encoded[:1000])  # Using subset of training labels for demonstration

# Predict labels for test data
y_pred = clf.predict(kernel_matrix_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test_encoded[:1000], y_pred)  # Using subset of test labels for demonstration
print("Accuracy:", accuracy)


Accuracy: 0.099


WL-Kernel

In [11]:
# Define WL kernel function
def wl_kernel(graphs, iterations):
    # Initialize kernel matrix
    n = len(graphs)
    kernel_matrix = np.zeros((n, n))

    for i, G1 in enumerate(graphs):
        # Initialize node labels
        node_labels = {node: 0 for node in G1.nodes()}

        for _ in range(iterations):
            # Update node labels based on neighbor labels
            new_labels = {}
            for node in G1.nodes():
                neighbors = G1.neighbors(node)
                neighbor_labels = tuple(sorted(node_labels[n] for n in neighbors))
                new_labels[node] = neighbor_labels

            # Update node labels
            node_labels = new_labels

        # Compute kernel values
        for j, G2 in enumerate(graphs):
            kernel_matrix[i, j] = len([node for node in G1.nodes() if node_labels[node] == node_labels[node]])

    return kernel_matrix

# Compute WL kernel matrix
kernel_matrix_train = wl_kernel(X_train_graphs, iterations=10)
kernel_matrix_test = wl_kernel(X_test_graphs, iterations=10)


In [10]:
from sklearn import svm
from sklearn.metrics import accuracy_score

# Train SVM classifier
clf = svm.SVC(kernel='precomputed')
clf.fit(kernel_matrix_train, y_train_encoded[:1000])  # Using subset of training labels for demonstration

# Predict labels for test data
y_pred = clf.predict(kernel_matrix_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test_encoded[:1000], y_pred)  # Using subset of test labels for demonstration
print("Accuracy:", accuracy)

Accuracy: 0.099


#MUTAG

In [12]:
pip install grakel


Collecting grakel
  Downloading GraKeL-0.1.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: grakel
Successfully installed grakel-0.1.10


In [None]:
import numpy as np
from scipy.io import loadmat
from grakel.datasets import fetch_dataset
from grakel.kernels import ShortestPath

# Load MUTAG dataset
mutag_dataset = loadmat('mutag.mat')
graphs = mutag_dataset['MUTAG'][0]

# Convert dataset to a format compatible with GraKeL
# Assuming graphs is a list of NetworkX graphs

# Define graph kernel
kernel = ShortestPath()

# Compute graph kernel matrix
kernel_matrix = kernel.fit_transform(graphs)

# Now you can use kernel_matrix as input for machine learning models
