In [1]:
import torch 
from torch_geometric.datasets import TUDataset

In [2]:
""" 
MUTAG is a collection of nitroaromatic compounds and the goal is to predict their mutagenicity on Salmonella typhimurium. 
""" 

dataset = TUDataset(root='../data/raw/TUDataset', name='MUTAG')

print(f'Dataset: {dataset}: ')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0] # get first graph

print(data)
print('============================================')

# first graph
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip


Dataset: MUTAG(188): 
Number of graphs: 188
Number of features: 7
Number of classes: 2
Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])
Number of nodes: 17
Number of edges: 38
Average node degree: 2.24
Has isolated nodes: False
Has self-loops: False
Is undirected: True


Extracting ../data/raw/TUDataset/MUTAG/MUTAG.zip
Processing...
Done!


In [2]:
"""
PROTEINS is a dataset of proteins that are classified as enzymes or non-enzymes. Nodes represent the amino acids and two nodes are connected by an edge if they are less than 6 Angstroms apart.
"""
dataset = TUDataset(root='../data/raw/TUDataset', name='PROTEINS')

print(f'Dataset: {dataset}: ')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0] # get first graph

print(data)
print('============================================')

# first graph
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/PROTEINS.zip
Extracting ../data/raw/TUDataset/PROTEINS/PROTEINS.zip
Processing...


Dataset: PROTEINS(1113): 
Number of graphs: 1113
Number of features: 3
Number of classes: 2
Data(edge_index=[2, 162], x=[42, 3], y=[1])
Number of nodes: 42
Number of edges: 162
Average node degree: 3.86
Has isolated nodes: False
Has self-loops: False
Is undirected: True


Done!


In [3]:
"""
ENZYMES is a dataset of 600 protein tertiary structures obtained from the BRENDA enzyme database. The ENZYMES dataset contains 6 enzymes.
"""
dataset = TUDataset(root='../data/raw/TUDataset', name='ENZYMES')

print(f'Dataset: {dataset}: ')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0] # get first graph

print(data)
print('============================================')

# first graph
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip
Extracting ../data/raw/TUDataset/ENZYMES/ENZYMES.zip
Processing...


Dataset: ENZYMES(600): 
Number of graphs: 600
Number of features: 3
Number of classes: 6
Data(edge_index=[2, 168], x=[37, 3], y=[1])
Number of nodes: 37
Number of edges: 168
Average node degree: 4.54
Has isolated nodes: False
Has self-loops: False
Is undirected: True


Done!


In [4]:
"""
Reddit 5000 graphs
"""
dataset = TUDataset(root='../data/raw/TUDataset', name='REDDIT-MULTI-5K')

print(f'Dataset: {dataset}: ')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0] # get first graph

print(data)
print('============================================')

# first graph
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/REDDIT-MULTI-5K.zip
Extracting ../data/raw/TUDataset/REDDIT-MULTI-5K/REDDIT-MULTI-5K.zip
Processing...


Dataset: REDDIT-MULTI-5K(4999): 
Number of graphs: 4999
Number of features: 0
Number of classes: 5
Data(edge_index=[2, 3776], y=[1], num_nodes=1593)
Number of nodes: 1593
Number of edges: 3776
Average node degree: 2.37
Has isolated nodes: False
Has self-loops: False
Is undirected: True


Done!


In [5]:
"""
Github Stargazers
"""
dataset = TUDataset(root='../data/raw/TUDataset', name='github_stargazers')

print(f'Dataset: {dataset}: ')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0] # get first graph

print(data)
print('============================================')

# first graph
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/github_stargazers.zip
Extracting ../data/raw/TUDataset/github_stargazers/github_stargazers.zip
Processing...


Dataset: github_stargazers(12725): 
Number of graphs: 12725
Number of features: 0
Number of classes: 2
Data(edge_index=[2, 156], y=[1], num_nodes=71)
Number of nodes: 71
Number of edges: 156
Average node degree: 2.20
Has isolated nodes: False
Has self-loops: False
Is undirected: True


Done!
