In [11]:
import os

# Define the dataset URL and the paths
dataset_url = "https://bohrium-api.dp.tech/ds-dl/DeePMD-kit-Tutorial-a8z5-v1.zip"
zip_file_name = "DeePMD-kit-Tutorial-a8z5-v1.zip"
dataset_directory = "DeePMD-kit_Tutorial"
home_directory = os.path.expanduser("~")  # Use home directory
local_zip_path = os.path.join(home_directory, zip_file_name)
extract_path = os.path.join(home_directory, "DeePMD-kit_Tutorial")

# Ensure the base directory exists
if not os.path.exists(extract_path):
    os.makedirs(extract_path)
    print(f"Created directory: {extract_path}")

# Check if the dataset directory exists to avoid re-downloading and re-extracting
if not os.path.isdir(os.path.join(extract_path, dataset_directory)):
    # Download the dataset if it doesn't already exist
    if not os.path.isfile(local_zip_path):
        print("Downloading dataset...")
        os.system(f"wget -q -O {local_zip_path} {dataset_url}")
    
    # Extract the dataset
    if os.path.isfile(local_zip_path):
        print("Extracting dataset...")
        os.system(f"unzip -q -n {local_zip_path} -d {extract_path}")
    else:
        raise FileNotFoundError(f"Failed to download the file from {dataset_url}")
else:
    print("Dataset is already downloaded and extracted.")

# Change the current working directory
os.chdir(extract_path)
print(f"Current path is: {os.getcwd()}")


Dataset is already downloaded and extracted.
Current path is: /Users/brian/DeePMD-kit_Tutorial


In [24]:
! tree DeePMD-kit_Tutorial -L 1

[01;34mDeePMD-kit_Tutorial[0m
├── [01;34m00.data[0m
├── [01;34m01.train[0m
├── [01;34m01.train.finished[0m
├── [01;34m02.lmp[0m
└── [01;34m02.lmp.finished[0m

6 directories, 0 files


In [25]:
! tree DeePMD-kit_Tutorial/00.data -L 1

[01;34mDeePMD-kit_Tutorial/00.data[0m
├── [01;34mabacus_md[0m
├── [01;34mtraining_data[0m
└── [01;34mvalidation_data[0m

4 directories, 0 files


In [28]:
import dpdata
import numpy as np

# load data of abacus/md format
data = dpdata.LabeledSystem("DeePMD-kit_Tutorial/00.data/abacus_md", fmt="abacus/md")
print("# the data contains %d frames" % len(data))

# random choose 40 index for validation_data
rng = np.random.default_rng()
index_validation = rng.choice(201, size=40, replace=False)

# other indexes are training_data
index_training = list(set(range(201)) - set(index_validation))
data_training = data.sub_system(index_training)
data_validation = data.sub_system(index_validation)

# all training data put into directory:"training_data"
data_training.to_deepmd_npy("DeePMD-kit_Tutorial/00.data/training_data")

# all validation data put into directory:"validation_data"
data_validation.to_deepmd_npy("DeePMD-kit_Tutorial/00.data/validation_data")

print("# the training data contains %d frames" % len(data_training))
print("# the validation data contains %d frames" % len(data_validation))

# the data contains 201 frames
# the training data contains 161 frames
# the validation data contains 40 frames


In [29]:
! tree DeePMD-kit_Tutorial/00.data/ -L 1

[01;34mDeePMD-kit_Tutorial/00.data/[0m
├── [01;34mabacus_md[0m
├── [01;34mtraining_data[0m
└── [01;34mvalidation_data[0m

4 directories, 0 files


In [30]:
! tree DeePMD-kit_Tutorial/00.data/training_data -L 1

[01;34mDeePMD-kit_Tutorial/00.data/training_data[0m
├── [01;34mset.000[0m
├── [00mtype.raw[0m
└── [00mtype_map.raw[0m

2 directories, 2 files


In [31]:
! cat DeePMD-kit_Tutorial/00.data/training_data/type.raw

0
0
0
0
1


In [32]:
! cat DeePMD-kit_Tutorial/00.data/training_data/type_map.raw

H
C


In [33]:
# Check dargs version and Install
!pip show dargs || pip install --upgrade dargs

[0mCollecting dargs
  Downloading dargs-0.4.10-py3-none-any.whl.metadata (11 kB)
Collecting typeguard>=4 (from dargs)
  Downloading typeguard-4.4.1-py3-none-any.whl.metadata (3.7 kB)
Collecting typing-extensions>=4.10.0 (from typeguard>=4->dargs)
  Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Downloading dargs-0.4.10-py3-none-any.whl (27 kB)
Downloading typeguard-4.4.1-py3-none-any.whl (35 kB)
Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)
Installing collected packages: typing-extensions, typeguard, dargs
  Attempting uninstall: typing-extensions
    Found existing installation: typing_extensions 4.9.0
    Uninstalling typing_extensions-4.9.0:
      Successfully uninstalled typing_extensions-4.9.0
Successfully installed dargs-0.4.10 typeguard-4.4.1 typing-extensions-4.12.2


In [11]:
# Show input.json
from deepmd.utils.argcheck import gen_args
from dargs.notebook import JSON

with open("./DeePMD-kit_Tutorial/01.train/input.json") as f:
    JSON(f.read(), gen_args())

In [12]:
# ########## Time Warning: 120 secs,C32_CPU ; 13 mins ,C2_CPU ##########
! cd DeePMD-kit_Tutorial/01.train/ && dp train input.json

2025-01-07 15:13:02.633268: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Instructions for updating:
non-resource variables are not supported in the long term
DEEPMD INFO    Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
DEEPMD INFO    training data with min nbor dist: 1.0459205686110267
DEEPMD INFO    training data with max nbor size: [4 1]
DEEPMD INFO     _____               _____   __  __  _____           _     _  _   
DEEPMD INFO    |  __ \             |  __ \ |  \/  ||  __ \         | |   (_)| |  
DEEPMD INFO    | |  | |  ___   ___ | |__) || \  / || |  