# 1. 准备环境：

In [3]:
import os
import sys
import time
import warnings
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR
from random import sample
from sklearn import metrics
from torch.autograd import Variable
from cgcnn.data import CIFData, collate_pool, get_train_val_test_loader
from cgcnn.model import CrystalGraphConvNet
from sklearn.metrics import mean_absolute_error

# Ensure that CUDA is available if specified in args
args = {
    'cuda': torch.cuda.is_available(),
    'task': 'regression',  # example, set as per your requirements
    'batch_size': 256,
    'train_ratio': None,
    'workers': 4,  # adjust number of workers for your system
    'epochs': 30,
    'lr': 0.01,
    'lr_milestones': [100],
    'momentum': 0.9,
    'weight_decay': 0.0,
    'print_freq': 10,
    'resume': '',  # leave empty if no checkpoint
    'optim': 'SGD',
    'atom_fea_len': 64,
    'n_conv': 3,
    'h_fea_len': 128,
    'n_h': 1,
    'data_options': ['./data/material-data']  # Replace with actual data path
}

# Check if CUDA is enabled
if args['cuda']:
    print("CUDA is available.")
else:
    print("CUDA is not available.")


CUDA is available.


# 2. 数据加载：

In [6]:
# Load dataset and create data loaders
dataset = CIFData(*args['data_options'])
collate_fn = collate_pool

train_loader, val_loader, test_loader = get_train_val_test_loader(
    dataset=dataset,
    collate_fn=collate_fn,
    batch_size=args['batch_size'],
    train_ratio=args['train_ratio'],
    num_workers=args['workers'],
    val_ratio=0.1,  # validation ratio
    test_ratio=0.1,  # test ratio
    pin_memory=args['cuda'],
    train_size=None,
    val_size=None,
    test_size=None,
    return_test=True
)




# 3. 构建模型：

In [8]:
# Obtain the feature lengths from the first sample
structures, _, _ = dataset[0]
orig_atom_fea_len = structures[0].shape[-1]
nbr_fea_len = structures[1].shape[-1]

# Build the model
model = CrystalGraphConvNet(
    orig_atom_fea_len=orig_atom_fea_len,  # Atom feature length
    nbr_fea_len=nbr_fea_len,  # Neighbor feature length
    atom_fea_len=args['atom_fea_len'],
    n_conv=args['n_conv'],
    h_fea_len=args['h_fea_len'],
    n_h=args['n_h'],
    classification=True if args['task'] == 'classification' else False
)

# Move model to GPU if available
if args['cuda']:
    model.cuda()

# Print model summary (optional)
print(model)


CrystalGraphConvNet(
  (embedding): Linear(in_features=92, out_features=64, bias=True)
  (convs): ModuleList(
    (0-2): 3 x ConvLayer(
      (fc_full): Linear(in_features=169, out_features=128, bias=True)
      (sigmoid): Sigmoid()
      (softplus1): Softplus(beta=1.0, threshold=20.0)
      (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (softplus2): Softplus(beta=1.0, threshold=20.0)
    )
  )
  (conv_to_fc): Linear(in_features=64, out_features=128, bias=True)
  (conv_to_fc_softplus): Softplus(beta=1.0, threshold=20.0)
  (fc_out): Linear(in_features=128, out_features=1, bias=True)
)


# 4. 定义损失函数和优化器：

In [9]:
# Define loss function and optimizer
if args['task'] == 'classification':
    criterion = nn.NLLLoss()
else:
    criterion = nn.MSELoss()

if args['optim'] == 'SGD':
    optimizer = optim.SGD(model.parameters(), args['lr'],
                          momentum=args['momentum'],
                          weight_decay=args['weight_decay'])
elif args['optim'] == 'Adam':
    optimizer = optim.Adam(model.parameters(), args['lr'],
                           weight_decay=args['weight_decay'])
else:
    raise ValueError('Only SGD or Adam is allowed as --optim')


# 5. 恢复模型（如果有检查点）：

In [10]:
# Optionally resume from a checkpoint
if args['resume']:
    if os.path.isfile(args['resume']):
        print(f"=> Loading checkpoint '{args['resume']}'")
        checkpoint = torch.load(args['resume'])
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print(f"=> Loaded checkpoint '{args['resume']}'")
    else:
        print(f"=> No checkpoint found at '{args['resume']}'")


# 6. 训练循环：

In [None]:
# Scheduler for learning rate adjustment
scheduler = MultiStepLR(optimizer, milestones=args['lr_milestones'], gamma=0.1)

# Training loop
for epoch in range(args['epochs']):
    # Train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # Validate on validation set
    mae_error = validate(val_loader, model, criterion)
    if mae_error != mae_error:  # Check for NaN
        print('Exit due to NaN')
        sys.exit(1)

    # Adjust learning rate
    scheduler.step()

    # Save best model
    is_best = mae_error < best_mae_error if args['task'] == 'regression' else mae_error > best_mae_error
    best_mae_error = min(mae_error, best_mae_error) if args['task'] == 'regression' else max(mae_error, best_mae_error)
    
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_mae_error': best_mae_error,
        'optimizer': optimizer.state_dict(),
        'args': args
    }, is_best)


In [None]:
# Test best model
print('---------Evaluate Model on Test Set---------------')
best_checkpoint = torch.load('model_best.pth.tar')
model.load_state_dict(best_checkpoint['state_dict'])
validate(test_loader, model, criterion, test=True)


# main()

In [1]:
# import importlib
# # import model
# # importlib.reload(model)
# import unit
# importlib.reload(unit)
import os
import torch
import torch.nn as nn
import torch.optim as optim
import warnings
from torch.optim.lr_scheduler import MultiStepLR
from random import sample
import random
import argparse

from cgcnn.data import CIFData, collate_pool, get_train_val_test_loader
from cgcnn.model import CrystalGraphConvNet
from cgcnn.unit import train, validate, save_checkpoint, AverageMeter,Normalizer

# CIF数据精度警告
import warnings
warnings.filterwarnings("ignore", message="Issues encountered while parsing CIF")

random_num = 42
# 设置随机种子确保结果可复现
random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x18499e2f330>

In [2]:
# 参数定义，直接在代码中进行设置
args = {
    'data_options': ['./data/material-data'],  # 请确保替换为你的数据路径
    'task': 'regression',  # 可以是 'classification' 或 'regression'
    'batch_size': 256,
    'train_ratio': 0.8,
    'val_ratio': 0.1,
    'test_ratio': 0.1,
    'cuda': torch.cuda.is_available(),
    'optim': 'Adam',  # 'SGD' 或 'Adam'
    'lr': 0.001,
    'lr_milestones': [50, 100],  # 学习率衰减里程碑
    'momentum': 0.9,
    'weight_decay': 0.0005,
    'epochs': 200,
    'start_epoch': 0,
    'atom_fea_len': 64,
    'n_conv': 3,
    'h_fea_len': 128,
    'n_h': 1,
    'resume': '',  # 如果有预训练模型路径可以指定
    'train_size': None,  # 设置 None 或实际值
    'val_size': None,  # 设置 None 或实际值
    'test_size': None,  # 设置 None 或实际值
    'print_freq': 90  # 打印频率
}

# 将字典转换为Namespace对象
args_Namespace = argparse.Namespace(**args)

best_mae_error = float('inf')  # 初始值设为无限大


In [3]:
global args, best_mae_error

# load data
dataset = CIFData(*args['data_options'])
collate_fn = collate_pool
train_loader, val_loader, test_loader = get_train_val_test_loader(
    dataset=dataset,
    collate_fn=collate_fn,
    batch_size=args['batch_size'],
    train_ratio=args['train_ratio'],
    num_workers=4,  # 你可以根据你的机器配置调整这个
    val_ratio=args['val_ratio'],
    test_ratio=args['test_ratio'],
    pin_memory=args['cuda'],
    train_size=args['train_size'],
    val_size=args['val_size'],
    test_size=args['test_size'],
    return_test=True
)

In [4]:
# obtain target value normalizer
if args['task'] == 'classification':
    normalizer = Normalizer(torch.zeros(2))
    normalizer.load_state_dict({'mean': 0., 'std': 1.})
else:
    if len(dataset) < 500:
        warnings.warn('Dataset has less than 500 data points. '
                        'Lower accuracy is expected. ')
        sample_data_list = [dataset[i] for i in range(len(dataset))]
    else:
        sample_data_list = [dataset[i] for i in sample(range(len(dataset)), 500)]
    _, sample_target, _ = collate_pool(sample_data_list)
    normalizer = Normalizer(sample_target)

# build model
structures, _, _ = dataset[0]
orig_atom_fea_len = structures[0].shape[-1]
nbr_fea_len = structures[1].shape[-1]
model = CrystalGraphConvNet(orig_atom_fea_len, nbr_fea_len,
                            atom_fea_len=args['atom_fea_len'],
                            n_conv=args['n_conv'],
                            h_fea_len=args['h_fea_len'],
                            n_h=args['n_h'],
                            classification=True if args['task'] == 'classification' else False)
if args['cuda']:
    model.cuda()

# define loss func and optimizer
if args['task'] == 'classification':
    criterion = nn.NLLLoss()
else:
    criterion = nn.MSELoss()
if args['optim'] == 'SGD':
    optimizer = optim.SGD(model.parameters(), args['lr'],
                            momentum=args['momentum'],
                            weight_decay=args['weight_decay'])
elif args['optim'] == 'Adam':
    optimizer = optim.Adam(model.parameters(), args['lr'],
                            weight_decay=args['weight_decay'])
else:
    raise NameError('Only SGD or Adam is allowed as --optim')

# optionally resume from a checkpoint
if args['resume']:
    if os.path.isfile(args['resume']):
        print("=> loading checkpoint '{}'".format(args['resume']))
        checkpoint = torch.load(args['resume'])
        args['start_epoch'] = checkpoint['epoch']
        best_mae_error = checkpoint['best_mae_error']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        normalizer.load_state_dict(checkpoint['normalizer'])
        print("=> loaded checkpoint '{}' (epoch {})"
                .format(args['resume'], checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(args['resume']))

scheduler = MultiStepLR(optimizer, milestones=args['lr_milestones'],
                        gamma=0.1)

In [5]:
for epoch in range(args['start_epoch'], args['epochs']):
    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch, normalizer,args_Namespace)

    # evaluate on validation set
    mae_error = validate(val_loader, model, criterion, normalizer,args_Namespace)

    if mae_error != mae_error:  # If NaN
        print('Exit due to NaN')
        sys.exit(1)

    scheduler.step()

    # remember the best mae_error and save checkpoint
    if args['task'] == 'regression':
        is_best = mae_error < best_mae_error
        best_mae_error = min(mae_error, best_mae_error)
    else:
        is_best = mae_error > best_mae_error
        best_mae_error = max(mae_error, best_mae_error)
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_mae_error': best_mae_error,
        'optimizer': optimizer.state_dict(),
        'normalizer': normalizer.state_dict(),
        'args': args
    }, is_best)

# test best model
print('---------Evaluate Model on Test Set---------------')
best_checkpoint = torch.load('model_best.pth.tar')
model.load_state_dict(best_checkpoint['state_dict'])
validate(test_loader, model, criterion, normalizer, args_Namespace,test=True)

Epoch: [0][0/95]	Time 12.291 (12.291)	Data 11.917 (11.917)	Loss 1.2570 (1.2570)	MAE 0.746 (0.746)
Epoch: [0][90/95]	Time 0.055 (1.486)	Data 0.000 (1.394)	Loss 0.0514 (0.1506)	MAE 0.126 (0.201)
Test: [0/12]	Time 12.957 (12.957)	Loss 0.0410 (0.0410)	MAE 0.125 (0.125)
 * MAE 0.137
Epoch: [1][0/95]	Time 12.864 (12.864)	Data 12.804 (12.804)	Loss 0.0977 (0.0977)	MAE 0.189 (0.189)
Epoch: [1][90/95]	Time 0.069 (1.117)	Data 0.000 (1.052)	Loss 0.0457 (0.0543)	MAE 0.130 (0.134)
Test: [0/12]	Time 11.486 (11.486)	Loss 0.0704 (0.0704)	MAE 0.172 (0.172)
 * MAE 0.186
Epoch: [2][0/95]	Time 11.789 (11.789)	Data 11.722 (11.722)	Loss 0.0818 (0.0818)	MAE 0.187 (0.187)
Epoch: [2][90/95]	Time 0.062 (1.046)	Data 0.000 (0.982)	Loss 0.0198 (0.0535)	MAE 0.078 (0.134)
Test: [0/12]	Time 11.296 (11.296)	Loss 0.0360 (0.0360)	MAE 0.115 (0.115)
 * MAE 0.109
Epoch: [3][0/95]	Time 11.483 (11.483)	Data 11.395 (11.395)	Loss 0.0460 (0.0460)	MAE 0.135 (0.135)
Epoch: [3][90/95]	Time 0.063 (1.049)	Data 0.000 (0.983)	Loss 0.01

  best_checkpoint = torch.load('model_best.pth.tar')


Test: [0/12]	Time 12.358 (12.358)	Loss 0.0060 (0.0060)	MAE 0.045 (0.045)
 ** MAE 0.052


tensor(0.0517)