In [None]:
import ase

In [2]:
import pandas as pd

# 파일 가져오기
struct_file = pd.read_csv('structures.csv')

molecule_name : 분자 이름 <br>
atom_index : 원자 순서 인덱스 <br>
atom : 원자기호 <br>
x : 원자의 수평 위치를 나타내는 좌표 값 <br>
y : 원자의 수직 위치를 나타내는 좌표 값 <br>
z : 원자의 깊이(높이) 위치를 나타내는 좌표 값

In [3]:
# 파일을 잘 가져왔는지 확인
struct_file

Unnamed: 0,molecule_name,atom_index,atom,x,y,z
0,dsgdb9nsd_000001,0,C,-0.012698,1.085804,0.008001
1,dsgdb9nsd_000001,1,H,0.002150,-0.006031,0.001976
2,dsgdb9nsd_000001,2,H,1.011731,1.463751,0.000277
3,dsgdb9nsd_000001,3,H,-0.540815,1.447527,-0.876644
4,dsgdb9nsd_000001,4,H,-0.523814,1.437933,0.906397
...,...,...,...,...,...,...
2358870,dsgdb9nsd_133885,11,H,-1.454004,-0.967309,1.459246
2358871,dsgdb9nsd_133885,12,H,0.277779,-2.697872,0.195770
2358872,dsgdb9nsd_133885,13,H,2.515854,-1.151784,0.527369
2358873,dsgdb9nsd_133885,14,H,0.013699,1.199431,-1.680192


In [4]:
import random

# Select a molecule
# struct_file['molecule_name'].unique() : molecule_name 컬럼에 있는 고유한 이름들 추출
# random.choice() : 주어진 리스트에서 무작위로 하나의 원소를 선택하는 함수
random_molecule = random.choice(struct_file['molecule_name'].unique())  

# random_molecule로 선택된 분자를 선택하여 molecule이라는 PandasDataframe에 저장.
molecule = struct_file[struct_file['molecule_name'] == random_molecule]
display(molecule)

Unnamed: 0,molecule_name,atom_index,atom,x,y,z
1591897,dsgdb9nsd_092247,0,C,-0.117966,1.560443,-0.047271
1591898,dsgdb9nsd_092247,1,C,0.070507,0.063025,0.052138
1591899,dsgdb9nsd_092247,2,O,1.174903,-0.439922,-0.766595
1591900,dsgdb9nsd_092247,3,C,1.639013,-1.34394,0.26982
1591901,dsgdb9nsd_092247,4,C,2.913995,-0.844509,0.993768
1591902,dsgdb9nsd_092247,5,C,2.00882,-0.032018,1.964461
1591903,dsgdb9nsd_092247,6,C,1.394408,-0.787576,3.172679
1591904,dsgdb9nsd_092247,7,O,0.15886,-1.068091,2.448378
1591905,dsgdb9nsd_092247,8,C,0.735101,-0.56478,1.264489
1591906,dsgdb9nsd_092247,9,H,0.798685,2.089188,0.229031


In [5]:
# Get atomic coordinates
atoms = molecule.iloc[:, 3:].values # 모든 행을 선택하되,
                                    # 3번째 컬럼부터 마지막 컬럼까지의 데이터를 선택하여
                                    # Numpy 배열로 변환
print(atoms)

[[-0.11796602  1.56044273 -0.04727146]
 [ 0.07050729  0.0630245   0.05213847]
 [ 1.17490284 -0.43992204 -0.76659496]
 [ 1.63901314 -1.34394033  0.26982023]
 [ 2.91399542 -0.84450907  0.99376821]
 [ 2.00882003 -0.03201773  1.96446114]
 [ 1.39440806 -0.78757617  3.17267853]
 [ 0.1588599  -1.06809118  2.448378  ]
 [ 0.73510114 -0.56478013  1.26448893]
 [ 0.79868477  2.08918766  0.22903145]
 [-0.38384154  1.84500001 -1.06987837]
 [-0.92692621  1.88290953  0.61670694]
 [-0.85951137 -0.46400199 -0.21210741]
 [ 1.53358651 -2.39558796 -0.01035656]
 [ 3.51139109 -1.62899718  1.46895041]
 [ 3.56096876 -0.23887678  0.35540267]
 [ 2.21390206  1.02621349  2.12416231]
 [ 1.91030751 -1.71044185  3.46401847]
 [ 1.19478136 -0.18312579  4.06271681]]


In [6]:
# Get atomic symbols
symbols = molecule.iloc[:, 2].values # 모든 행을 선택하되, 2번째 칼럼의 데이터만 선택.
print(symbols)

['C' 'C' 'O' 'C' 'C' 'C' 'C' 'O' 'C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'H'
 'H']


In [11]:
from ase import Atoms
import ase.visualize

system = Atoms(positions=atoms, symbols=symbols)

ase.visualize.view(system, viewer="x3d")

In [8]:
def view(molecule):
    # Select a molecule
    mol = struct_file[struct_file['molecule_name'] == molecule]
    
    # Get atomic coordinates
    xcart = mol.iloc[:, 3:].values
    
    # Get atomic symbols
    symbols = mol.iloc[:, 2].values
    
    # Display molecule
    system = Atoms(positions=xcart, symbols=symbols)
    print('Molecule Name: %s.' %molecule)
    return ase.visualize.view(system, viewer="x3d")

random_molecule = random.choice(struct_file['molecule_name'].unique())
view(random_molecule)

Molecule Name: dsgdb9nsd_123045.
