In [1]:
!pip install torch torch_geometric pymatgen matminer



In [2]:
from matminer.datasets import load_dataset
from pymatgen.core import Structure
from sklearn.model_selection import train_test_split

df = load_dataset("castelli_perovskites")
df = df[df["gap gllbsc"].notnull()].reset_index(drop=True)

structures = df["structure"].tolist()
targets = df["gap gllbsc"].values

struct_train, struct_test, y_train, y_test = train_test_split(structures, targets, test_size=0.2, random_state=42)

In [3]:
import torch
from torch_geometric.data import Data
from torch_geometric.nn.models.schnet import SchNet

def structure_to_graph(structure: Structure, cutoff=5.0):
    positions = torch.tensor([site.coords for site in structure], dtype=torch.float)
    atomic_numbers = torch.tensor([site.specie.number for site in structure], dtype=torch.long)

    edge_index = []
    edge_attr = []

    for i, site_i in enumerate(structure.sites):
        for j, site_j in enumerate(structure.sites):
            if i != j:
                dist = site_i.distance(site_j)
                if dist < cutoff:
                    edge_index.append([i, j])
                    edge_attr.append([dist])

    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    edge_attr = torch.tensor(edge_attr, dtype=torch.float)

    return Data(
        x=atomic_numbers.unsqueeze(1),  # optional, for other models
        z=atomic_numbers,               # ← これを追加！SchNet用
        pos=positions,
        edge_index=edge_index,
        edge_attr=edge_attr
    )

# グラフ変換
train_graphs = [structure_to_graph(s) for s in struct_train]
test_graphs = [structure_to_graph(s) for s in struct_test]
for i, g in enumerate(train_graphs):
    g.y = torch.tensor([y_train[i]], dtype=torch.float)


  positions = torch.tensor([site.coords for site in structure], dtype=torch.float)


In [4]:
# PyTorch/torch-geometric対応バージョンに応じてインストール（2024年5月版）
!pip install torch-scatter torch-sparse torch-cluster -f https://data.pyg.org/whl/torch-2.0.0+cpu.html

Looking in links: https://data.pyg.org/whl/torch-2.0.0+cpu.html


In [5]:
from torch_geometric.loader import DataLoader
import torch.nn.functional as F

train_loader = DataLoader(train_graphs, batch_size=16)
test_loader = DataLoader(test_graphs, batch_size=16)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = SchNet(hidden_channels=64, num_filters=64, num_interactions=3, cutoff=5.0).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 学習ループ
for epoch in range(1, 21):
    model.train()
    total_loss = 0
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch.z, batch.pos, batch.batch)
        loss = F.l1_loss(out.view(-1), batch.y.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch:02d}, MAE: {total_loss / len(train_loader):.4f}")


ImportError: 'radius_graph' requires 'torch-cluster'

In [6]:
import torch
print(torch.__version__)


2.6.0+cu124
