In [1]:
import pandas as pd
import requests
from io import StringIO

class AtomicDataLoader:
    def __init__(self, source):
        self.source = source

    def load_data(self):
        if self.source.startswith('http://') or self.source.startswith('https://'):
            response = requests.get(self.source)
            lines = response.text.split('\n')
        else:
            with open(self.source, 'r') as file:
                lines = file.readlines()
        
        data = []
        for line in lines[1:]:  # Skip the first line
            if line.strip():  # Skip empty lines
                parts = line.split()
                element = parts[0]
                index = int(parts[1])
                x_coord = float(parts[2])
                y_coord = float(parts[3])
                z_coord = float(parts[4])
                data.append([element, index, x_coord, y_coord, z_coord])
        
        df = pd.DataFrame(data, columns=['Element', 'Index', 'X', 'Y', 'Z'])
        return df

# # Usage example with a file path
# file_path = '/mnt/data/sio2-cart.chem3d'
# loader = AtomicDataLoader(file_path)
# df = loader.load_data()

# Usage example with a URL
url = 'https://isaacs.sourceforge.io/tests/sio2-cart.chem3d'
loader = AtomicDataLoader(url)
df = loader.load_data()
print(f"Number of rows: {len(df)}, number of silicon atoms: {len(df[df['Element'] == 'Si'])}, number of oxygen atoms: {len(df[df['Element'] == 'O'])}")
df.head()

Number of rows: 3000, number of silicon atoms: 1000, number of oxygen atoms: 2000


Unnamed: 0,Element,Index,X,Y,Z
0,O,1,5.043,-16.116,-4.787
1,O,2,7.184,-16.066,-3.85
2,O,3,-8.529,-1.029,-0.326
3,O,4,2.347,5.238,-6.673
4,O,5,-3.873,13.915,7.447


In [2]:
# reduce the number of points by only keeping atoms
# within a certain distance of the origin
df['Distance'] = (df['X']**2 + df['Y']**2 + df['Z']**2)**0.5
df = df[df['Distance'] < 16].reset_index(drop=True)
print(f"Number of rows: {len(df)}, number of silicon atoms: {len(df[df['Element'] == 'Si'])}, number of oxygen atoms: {len(df[df['Element'] == 'O'])}")
# use plotly to visualize the data. Oxygen atoms are red, silicon atoms are blue
import plotly.express as px
fig = px.scatter_3d(df, x='X', y='Y', z='Z', color='Element', title='SiO2')
fig.update_traces(marker_size = 3)
fig.show()

Number of rows: 1130, number of silicon atoms: 373, number of oxygen atoms: 757


In [3]:
# keep all atoms in the upper row
points_upper = df[['X', 'Y', 'Z']].values
# remove silicon atoms in the lower row
deletion_list = df[df['Element'] == 'Si'].index
# Get one-parameter persistence information for the upper row
from commutazzio.persistence import PD_Points3D
from cpes import Points3D
SiO2_persistence=PD_Points3D(Points3D(points_upper))
# get the maximum critical radius when homology dimension is one
max_radius = max([d for (b,d,dim) in SiO2_persistence.diagram_1_r])
print(f"Max radius: {max_radius}")
# show the persistence diagrams at dimension one before and after thinning
SiO2_persistence.plot_1D(plotrange=(0,4))
OxygenOnly_persistence=PD_Points3D(Points3D(df[df['Element'] == 'O'][['X', 'Y', 'Z']].values))
fig1=SiO2_persistence.plot_1D(plotrange=(0,5))
fig2=OxygenOnly_persistence.plot_1D(plotrange=(0,5))
# show fig1 and fig2 side by side
fig1.show()
fig2.show()

Alpha complex is of dimension 3
The radius is squared.
Max radius: 10.395887562852284
Alpha complex is of dimension 3
The radius is squared.


In [4]:
# IMPORTANT!!!
# Please install an alpha version of chromatic_tda for building chromatic alpha complexes on 3D point clouds
# follow the instructions below for manual installation
# or run the following cell to install the package automatically
"""
1. Clone the repository from the specific branch:
    ```bash
    git clone --branch general-radius-function --recursive https://github.com/OnDraganov/chromatic-tda.git
    ``` 
2. Navigate to the cloned repository:
    ```bash
    cd chromatic-tda
    ```
3. Install the package:
    ```bash
    pip install .
    ```
4. Restart the kernel.

Ignore the pip error about dependency on chromatic_tda.
"""


'\n1. Clone the repository from the specific branch:\n    ```bash\n    git clone --branch general-radius-function --recursive https://github.com/OnDraganov/chromatic-tda.git\n    ``` \n2. Navigate to the cloned repository:\n    ```bash\n    cd chromatic-tda\n    ```\n3. Install the package:\n    ```bash\n    pip install .\n    ```\n4. Restart the kernel.\n\nIgnore the pip error about dependency on chromatic_tda.\n'

In [5]:
# Run this cell to install the alpha version of chromatic_tda
import chromatic_tda
if 'a' in chromatic_tda.__version__:
    print("Alpha version of chromatic_tda already installed.")
else:
    !git clone --branch general-radius-function --recursive https://github.com/OnDraganov/chromatic-tda.git
    !cd chromatic-tda && pip install .
    print("Alpha version of chromatic_tda installed.")
    # remove the cloned repository
    !rm -rf chromatic-tda
    print("\n" + "-"*20)
    # Use ANSI escape codes to make the text red and bold
    print("\033[1;31mPLEASE RESTART THE KERNEL TO USE THE ALPHA VERSION OF CHROMATIC_TDA.\033[0m")
    print("-"*20)


Alpha version of chromatic_tda already installed.


In [6]:
from commutazzio.filtration import pointCloud2Filtration, points_to_clfiltration_chro
# plot radii for the connected persistence diagram
# from zero to max_radius in 50 steps
plot_radii = [i*max_radius/50 for i in range(51)]
# 0 for atoms not in the deletion_list (added at layer 0), 1 for atoms in the deletion_list (added at layer 1)
labels = [1 if i in deletion_list else 0 for i in range(len(points_upper))]
filtration=points_to_clfiltration_chro(points_upper, labels, 2, plot_radii)
# filtration=pointCloud2Filtration(points_upper, deletion_list, plot_radii,max_simplex_dim=2)

Creating chromatic alpha filtration...


In [7]:
from commutazzio.compute import ConnectedPersistenceDiagram
from commutazzio.compute import CLInvariants
from commutazzio.plot import ComplementaryTrianglesPlot as Visualizer
from commutazzio.plot import OverlappingTrianglesPlot as Visualizer2
inv=CLInvariants(filtration)
inv.cPD_computation(homology_dim=1)
cPD=inv.cPDs[0]
fig=Visualizer2(cPD=cPD,title='SiO2').render()
fig.show()

''
Computing connected persistence diagram at homology dimension 1
Using new file path ./filtration/20240607_230242_da37721be2.fltr
Constructed all 318903 interval representations
Building the difference list of all paths...
Difference list building complete.
Received 52824 simplices
Upper layer barcode computation complete!
Received 14730 simplices
Received 18758 simplices
Progress: 3.57％ Received 25288 simplices
Progress: 7.14％ Received 21984 simplices
Progress: 10.71％ Received 30484 simplices
Progress: 14.29％ Received 36604 simplices
Progress: 17.86％ Received 46430 simplices
Progress: 21.43％ Received 58530 simplices
Progress: 25.00％ Received 70944 simplices
Progress: 28.57％ Received 26756 simplices
Progress: 32.14％ Received 32876 simplices
Progress: 35.71％ Received 42702 simplices
Progress: 39.29％ Received 54802 simplices
Progress: 42.86％ Received 67216 simplices
Progress: 46.43％ Received 30488 simplices
Progress: 50.00％ Received 40314 simplices
Progress: 53.57％ Received 52414 simpl