In [1]:
import pywindow as pw
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Lets use _pywindow_ to calculate different descriptors for all 74 cages

In [2]:
cages = np.loadtxt("all_cages/all_cages.txt", dtype=str)

In [3]:
property_file = open("pywindow_cage_descriptors.csv", "w")
property_file.write("cage,molecule_diameter_A,pore_diameter_A,nb_windows\n")

52

In [4]:
for cage in cages:
    print("Cage ", cage)
    molsys = pw.MolecularSystem.load_file("all_cages/%s_aligned.xyz" % cage)
    mol = molsys.system_to_molecule()
    mol.calculate_maximum_diameter()
    mol.calculate_pore_diameter()
    nb_windows = len(mol.calculate_windows())
    property_file.write("%s,%f,%f,%d\n" % (cage,
                       mol.properties['maximum_diameter']['diameter'],
                       mol.properties['pore_diameter']['diameter'],
                       nb_windows)
                       )
property_file.close()

Cage  A11
Cage  B11
Cage  B13
Cage  B15
Cage  B18
Cage  B1
Cage  B23
Cage  B24
Cage  B25
Cage  B26
Cage  B2
Cage  B4
Cage  B5
Cage  B6
Cage  B8
Cage  B9
Cage  C11
Cage  C13
Cage  C15
Cage  C18
Cage  C1
Cage  C20
Cage  C21
Cage  C23
Cage  C24
Cage  C25
Cage  C26
Cage  C2
Cage  C4
Cage  C5
Cage  C6
Cage  C8
Cage  C9
Cage  CB5
Cage  CB6
Cage  CB7
Cage  CC10
Cage  CC1
Cage  CC2
Cage  CC3
Cage  CC4
Cage  CC5
Cage  CC9
Cage  CD1
Cage  CD2
Cage  CD3
Cage  CP1
Cage  CP3
Cage  CP4
Cage  CP5
Cage  DC1
Cage  GC1
Cage  HC1
Cage  IC1
Cage  IC2
Cage  MC1
Cage  MC2
Cage  MC3
Cage  MC4
Cage  MC5
Cage  MC6
Cage  MC7
Cage  NC1
Cage  NC2
Cage  RCC1a
Cage  RCC1b
Cage  RCC1c
Cage  RCC1d
Cage  RCC3a
Cage  RCC3b
Cage  WC1
Cage  WC2
Cage  WC3
Cage  WC4


In [5]:
df = pd.read_csv("pywindow_cage_descriptors.csv")
df

Unnamed: 0,cage,molecule_diameter_A,pore_diameter_A,nb_windows
0,A11,20.231662,10.775757,4
1,B11,20.908240,6.236137,4
2,B13,24.710859,3.411158,4
3,B15,25.352257,9.821863,4
4,B18,28.057421,11.415718,4
5,B1,15.162314,2.062071,3
6,B23,22.291666,9.486308,6
7,B24,25.045377,8.621005,6
8,B25,29.485939,11.513237,6
9,B26,32.689931,12.512314,6


# Lets look at the window diameter for **CC3**

For the following code to run, please untar `flexible_cages.tar.gz`, located in `latent_cage_space/all_cages/`
In linux the following line will do the trick `tar -xvzf flexible_cages.tar.gz`

In [6]:
cc3_cages = np.loadtxt("all_cages/flexible_cages/flexible_cc3_files.txt", dtype=str)

In [None]:
window_diameters = []
for cage in cc3_cages:
    molsys = pw.MolecularSystem.load_file("%s" % cage)
    mol = molsys.system_to_molecule()
    windows = mol.calculate_windows()
    for wind in windows:
        window_diameters.append(wind)

In [None]:
fig = plt.figure(figsize=(10,8))
plt.hist(window_diameters, bins=100)
plt.xlabel('Window diameter (Å)')
plt.ylabel('Number of configurations')
plt.xlim(2.2, 5.2)
plt.savefig('CC3_histogram.png', dpi=300, format='png')

# Lets find descriptors for the fluctuating cages

In [None]:
flexible_cages = np.loadtxt("all_cages/flexible_cages/all_flexible_files.txt", dtype=str)

In [None]:
pore_diameters = []

for cage in flexible_cages:
    molsys = pw.MolecularSystem.load_file("%s" % cage)
    mol = molsys.system_to_molecule()
    mol.calculate_pore_diameter()
    pore_diameters.append(mol.properties['pore_diameter']['diameter'])


In [None]:
fig = plt.figure(figsize=(10,8))
maximum_pore = np.max(pore_diameters)
bin_size = 0.10
plt.hist(pore_diameters[0:400], bins=np.arange(0, maximum_pore, bin_size), color='#42d4f4', alpha=0.65, label='CC2')
plt.hist(pore_diameters[400:800], bins=np.arange(0, maximum_pore, bin_size), color='#f58231', alpha=0.65, label='CC3')
plt.hist(pore_diameters[800:1200], bins=np.arange(0, maximum_pore, bin_size), color='#911eb4', alpha=0.65, label='CC4')
plt.hist(pore_diameters[1200:1600], bins=np.arange(0, maximum_pore, bin_size), color='#800000', alpha=0.65, label='CC5')
plt.xlabel('Pore diameter [Å]')
plt.ylabel('Number of configurations')
plt.xlim(2.2, 10.8)
plt.legend()
plt.savefig('pore_diameter_histogram.png', dpi=300, format='png')