In [1]:
pip install --pre deepchem

Note: you may need to restart the kernel to use updated packages.


In [7]:
!pip install dgllife

Collecting dgllife
  Downloading dgllife-0.3.2-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.1/226.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting hyperopt
  Downloading hyperopt-0.2.7-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting future
  Downloading future-1.0.0-py3-none-any.whl (491 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.3/491.3 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting cloudpickle
  Downloading cloudpickle-3.1.0-py3-none-any.whl (22 kB)
Collecting py4j
  Downloading py4j-0.10.9.7-py2.py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.5/200.5 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: py4j, future, cloudpickle, hyperopt, dgllife
Successfully

In [None]:
import deepchem as dc
import pandas as pd
import numpy as np
from deepchem.models import GCNModel
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
#import iris dataset
df = pd.read_csv('data/iris.csv', header=None)
df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
species_map = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
df['species'] = df['species'].map(species_map)

#standardize features
scaler = StandardScaler()
features = df.iloc[:, :-1].values
features_scaled = scaler.fit_transform(features)
labels = df['species'].values

In [None]:
#split data with indices
indices = np.arange(len(features_scaled))
train_idx, test_idx = train_test_split(indices, test_size=0.2, random_state=42)

#split features and labels
train_features = features_scaled[train_idx]
test_features = features_scaled[test_idx]
train_labels = labels[train_idx]
test_labels = labels[test_idx]

# Cell 4: Create graph structures
def create_graph(features):
    n_samples = len(features)
    k = 5  #nb neighbors
    edge_index = []
    
    for i in range(n_samples):
        distances = []
        for j in range(n_samples):
            if i != j:
                dist = np.linalg.norm(features[i] - features[j])
                distances.append((j, dist))
        
        nearest_neighbors = sorted(distances, key=lambda x: x[1])[:k]
        for neighbor, _ in nearest_neighbors:
            edge_index.append([i, neighbor])
            edge_index.append([neighbor, i])
    
    edge_index = np.array(edge_index, dtype=np.int32).T
    return dc.feat.graph_data.GraphData(
        node_features=features.astype(np.float32),
        edge_index=edge_index,
        num_nodes=n_samples,
        num_edges=len(edge_index)
    )

#separation train and test
train_graph = create_graph(train_features)
test_graph = create_graph(test_features)

In [15]:
#create deepchem datasets train/test
train_dataset = dc.data.NumpyDataset([train_graph], train_labels.reshape(-1, 1))
test_dataset = dc.data.NumpyDataset([test_graph], test_labels.reshape(-1, 1))

n_features = features_scaled.shape[1] 

In [None]:
model = GCNModel(mode='classification', n_tasks=1,
                 batch_size=16, learning_rate=0.001, number_atom_features=n_features)

#train model
model.fit(train_dataset, nb_epoch=5)

0.04103921949863434