In [None]:
!pip install deepchem

In [None]:
!pip install --pre deepchem

In [14]:
!pip install pydotplus
!pip install graphviz



In [10]:
!pip install -q dgl-cu113 -f https://data.dgl.ai/wheels/repo.html

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m58.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m69.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 6794474462320641371
 xla_global_id: -1]

In [5]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from deepchem.feat import ConvMolFeaturizer, WeaveFeaturizer, DMPNNFeaturizer, MolGraphConvFeaturizer
from deepchem.feat.mol_graphs import ConvMol
import torch
import deepchem as dc

Instructions for updating:
experimental_relax_shapes is deprecated, use reduce_retracing instead


In [7]:
import logging
logger = logging.getLogger(__name__)

try:
    # TODO We should clean up DMPNN and remove torch_geometric dependency during import
    from deepchem.models.torch_models import MEGNetModel
    from deepchem.models.torch_models import DMPNN, DMPNNModel, GNNModular, MXMNet
except ImportError as e:
    logger.warning(
        f'Skipped loading modules with pytorch-geometric dependency, missing a dependency. {e}'
    )



In [16]:
from dgl import DGLGraph
from dgl.nn import GATConv

FileNotFoundError: Cannot find DGL C++ graphbolt library at /usr/local/lib/python3.10/dist-packages/dgl/graphbolt/libgraphbolt_pytorch_2.4.0.so

In [None]:
# Load data
train, test = pd.read_csv('/content/train.csv', index_col=False)[['IC50_nM', 'Smiles']], pd.read_csv('/content/test.csv')['Smiles']

# 피쳐와 타겟 변수 추출
X = MolGraphConvFeaturizer().featurize(train['Smiles'])
y = train['IC50_nM'].values

# 데이터셋 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 데이터를 DGLGraph 형식으로 변환
def create_dgl_graphs(features):
    graphs = []
    for feature in features:
        g = DGLGraph()
        # 여기에서 feature를 기반으로 그래프 삽입
        # (노드와 간선을 추가하는 로직을 구현해야 함)
        graphs.append(g)
    return graphs

train_graphs = create_dgl_graphs(X_train)
test_graphs = create_dgl_graphs(X_test)

# MPNN 모델 정의
class MPNN(nn.Module):
    def __init__(self, in_feats, hidden_size, out_feats):
        super(MPNN, self).__init__()
        self.conv1 = GATConv(in_feats, hidden_size, num_heads=8)
        self.conv2 = GATConv(hidden_size * 8, out_feats, num_heads=1)
        self.fc = nn.Linear(out_feats, 1)

    def forward(self, g, features):
        h = self.conv1(g, features)
        h = self.conv2(g, h)
        readout = dgl.mean_nodes(h)
        return self.fc(readout)

# 모델 초기화
model = MPNN(in_feats=1952, hidden_size=128, out_feats=64)  # your_input_size는 D-MPNNFeaturizer의 출력 크기
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 모델 훈련
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    # 각 그래프에 대한 배치 처리
    predictions = []
    for g in train_graphs:
        pred = model(g, g.ndata['features'])
        predictions.append(pred)

    loss = criterion(torch.cat(predictions), torch.tensor(y_train, dtype=torch.float32))
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

# 테스트
model.eval()
with torch.no_grad():
    test_predictions = []
    for g in test_graphs:
        pred = model(g, g.ndata['features'])
        test_predictions.append(pred)

# 테스트 결과 출력
print("Test Predictions:", test_predictions)