# Library Import

In [2]:
# Ignore warnings

import pandas as pd
import numpy as np
from pathlib import Path

import analyzer
import neural_network as nn
import convolutional_neural_network as cnn
import visualizer

# Training Data Set Preparation

In [None]:
resolution = [250,250]
# Resolution should be consistent throughout the file

In [None]:

# Because the training dataset are individual, the list of each data just contain themselves.
data1 = analyzer.Data("MORE_DATA/db_Y_0027.okc", resolution) 
data2 = analyzer.Data("MORE_DATA/db_Y_0030.okc", resolution)


# Training The Model

In [None]:
# Run this section if you want to use NN model

array1, headers1, non_nan_indices1, num_grids1 = nn.data_arranger(data1.df)
array2, headers2, non_nan_indices2, num_grids2 = nn.data_arranger(data2.df)

# The learning rate, batch size, and epochs are proven to be working.

nn_model = nn.model_create_compile(headers1, 0.05)

nn_model, loss_hist = nn.model_train(nn_model, array1, 1000, 10)
nn_model, loss_hist = nn.model_train(nn_model, array2, 1000, 10)

In [None]:
# Run this section if you want to use CNN model

array1, headers1, indices1 = cnn.data_arranger(data1.df, resolution)
array2, headers2, indices2 = cnn.data_arranger(data2.df, resolution)

# The learning rate and epochs are proven to be working.

cnn_model = cnn.model_2D_create_compile(headers1, 0.05, resolution)

cnn_model, loss_hist = cnn.model_2D_train(cnn_model, array1, 3)
cnn_model, loss_hist = cnn.model_2D_train(cnn_model, array2, 3)

# Data Classification

Choose codes from these 3 codes below to run if you want to classify few individual data.

In [None]:
data3 = analyzer.Data("MORE_DATA/db_Y_0049.okc", resolution)

In [None]:
# Run this code if your model choice is NN
array3, headers3, non_nan_indices3, num_grids3 = nn.data_arranger(data3.df)
data3.df = nn.model_classification(nn_model, array3, non_nan_indices3, num_grids3, data3.df, False)

In [None]:
# Run this code if your model choice is CNN
array3, headers3, indices3 = cnn.data_arranger(data3.df, resolution)
data3.df = cnn.model_2D_classification(cnn_model, array3, indices3, data3.df, False)

Choose codes from these 3 codes below to run if you want to classify a series of data.

In [None]:
list_paths_classify = [f"MORE_DATA/db_Y_{i:04d}.okc" for i in range(99)]

Data_classify = [analyzer.Data(path, list_paths_classify, resolution) for path in list_paths_classify] 

In [None]:
# Run this code if your model choice is NN
for data in Data_classify:
    array, headers, non_nan_indices, num_grids = nn.data_arranger(data.df)
    data.df = nn.model_classification(nn_model, array, non_nan_indices, num_grids, data.df, False)

In [None]:
# Run this code if your model choice is CNN
for data in Data_classify:
    array, headers, indices = cnn.data_arranger(data.df, data.resolution)
    data.df = cnn.model_2D_classification(cnn_model, array, indices, data.df, False)

# Data Export

Choose codes from these 2 codes below to run if you want to classify few individual data.

In [None]:
# Run this code to export the image
visualizer.plot_2D_df(data3.df, 'is_boundary', 'classification.png')

In [None]:
# Run this code to export the csv file
data3.df.to_csv('classification.csv', index=False)

# Temporary Code Zone
---

### Merge 3D CSVs

will remove empty rows

In [None]:
import os
import csv

path = '3D_DATA/small_db_result'
output_file = '3D_DATA/small_db.csv'

files = [file for file in os.listdir(path) if file.endswith('.csv')]

with open(output_file, 'w', newline='', encoding='utf-8') as outfile:
    writer = None
    
    for index, filename in enumerate(files):
        print(f"merging file: {index + 1}/{len(files)}", flush=True)
        
        with open(os.path.join(path, filename), 'r', encoding='utf-8') as infile:
            reader = csv.reader(infile)
            
            header = next(reader)
            
            if writer is None:
                writer = csv.writer(outfile)
                writer.writerow(header)
            
            for row in reader:
                writer.writerow(row)

print("All files merged successfully!")


### Plot Streamline 2D

In [1]:
from visualizer import plot_3D_to_2D_slice_streamline

plot_3D_to_2D_slice_streamline(input_file="3D_DATA/ra10e7_result/ra10e7_100.csv", output_file="streamlines.html", direction='y', seed_points_resolution=[20,20], max_time=0.2, cmap = 'viridis', axis_limits=[-0.5,0.5,0,1])

### Plot Streamline 3D

✅ from a manually made velocity field, plot streamline (for test)

✅ generate a csv file from a set velocity field(for test)

✅ read from csv

In [None]:
import pandas as pd
import numpy as np
import pyvista as pv

# Read the CSV file into a DataFrame
df = pd.read_csv('test.csv')

points = df[['x', 'y', 'z']].values
velocities = df[['x_velocity', 'y_velocity', 'z_velocity']].values

# Extract unique coordinate values for each axis (ensure they are sorted)
x_vals = np.sort(df['x'].unique())
y_vals = np.sort(df['y'].unique())
z_vals = np.sort(df['z'].unique())

# Determine grid dimensions
nx, ny, nz = len(x_vals), len(y_vals), len(z_vals)

# Reshape coordinates
x = df['x'].values.reshape((nx, ny, nz))
y = df['y'].values.reshape((nx, ny, nz))
z = df['z'].values.reshape((nx, ny, nz))

# Create the StructuredGrid
grid = pv.StructuredGrid(x, y, z)

# Add the velocity vectors
grid.point_data['velocity'] = velocities

seed_x, seed_y, seed_z = np.meshgrid(
    np.linspace(-0.5, 0.5, 3), # min, max, num
    np.linspace(-0.5, 0.5, 3),
    np.linspace(-0.5, 0.5, 3)
    )
seed_x = seed_x.ravel()
seed_y = seed_y.ravel()
seed_z = seed_z.ravel()

seed_points = np.column_stack((seed_x, seed_y, seed_z))
seed = pv.PolyData(seed_points)

streamlines = grid.streamlines_from_source(
    source=seed,
    vectors='velocity',
    integration_direction='both',
    max_time=10,
    initial_step_length=0.01,
    terminal_speed=1e-3
)

velocity_vectors = streamlines['velocity']
velocity_magnitude = np.linalg.norm(velocity_vectors, axis=1)
streamlines['velocity_magnitude'] = velocity_magnitude

# Visualize and export streamlines as HTML
plotter = pv.Plotter(off_screen=True)
plotter.add_mesh(grid.outline(), color='k')


plotter.add_mesh(
    streamlines.tube(radius=0.01),
    scalars='velocity_magnitude',
    cmap='viridis',  # Use the colormap specified in the function argument
    scalar_bar_args={'title': 'Velocity Magnitude'}
)

plotter.view_isometric()
# Show grid with axis labels
plotter.show_grid(
    xtitle='X',
    ytitle='Y',
    ztitle='Z',
    grid='front'  # Display the grid in front of the scene
)

plotter.export_html('output_file.html')

dealing with empty lines

In [1]:
import pandas as pd
import numpy as np

def fill_empty_rows(file_path:str) -> pd.DataFrame:
    '''
    Fills in the empty rows in a CSV file that represents a Y-slice 2D data grid. 
    The data is structured in repeating "sandwich" blocks where each block contains:
        [empty rows] + [non-empty data rows] + [empty rows].
    The missing rows (empty) occur due to a truncated `x` range. Non-empty data rows have same z coordinate
    but different x coordinates. The dataset is already on a regular grid, so no interpolation is needed. 

    Args:
        file_path: Path to the input CSV file containing possibly incomplete 2D slice data.

    Returns:
        pd.DataFrame:
            A DataFrame with the same shape as the original but with all empty rows filled in. 
            Only includes the essential columns: 
            ['x', 'y', 'z', 'x_velocity', 'y_velocity', 'z_velocity'].
    '''

    # Step 1: read df(dataframe)
    df = pd.read_csv(file_path)

    required_columns = ['x', 'y', 'z', 'x_velocity', 'y_velocity', 'z_velocity']
    df = df[required_columns]

    df_filled = df.copy()
    empty_mask = df.isna().all(axis=1) # A list length is num of rows. If all row is empty, corresponding element is True.

    # Step 2: Count initial empty rows
    first_non_empty_idx = empty_mask.idxmin()
    num_empty_rows = empty_mask[:first_non_empty_idx].sum()

    # Step 3: Count non-empty rows in the first pack
    idx = first_non_empty_idx
    while not empty_mask.iloc[idx]: # iloc[idx]: if that index is True, then True
        idx += 1
    num_non_empty_rows = idx - first_non_empty_idx

    # Step 5: Get x_diff from first block of non-empty rows
    x_vals = df_filled.loc[first_non_empty_idx:idx - 1, 'x'].values #loc: range of row, header, values() is return np array
    x_diff = np.diff(x_vals).mean()

    # Step 6: Precompute total number of "sandwiches"
    group_size = num_empty_rows * 2 + num_non_empty_rows
    total_groups = len(df_filled) // group_size

    for i in range(total_groups):
        # Calculate indices for each part of the sandwich
        start_idx = i * group_size
        mid_idx = start_idx + num_empty_rows
        end_idx = mid_idx + num_non_empty_rows

        # First and last x values of the current non-empty block
        block = df_filled.iloc[mid_idx:end_idx]
        if block['x'].isna().all():
            continue  # This block has no valid data, skip

        x_first = block['x'].iloc[0]
        x_last = block['x'].iloc[-1]

        y_first = block['y'].iloc[0]
        z_first = block['z'].iloc[0]
        y_last = block['y'].iloc[-1]
        z_last = block['z'].iloc[-1]

        # Build x values for rows ABOVE the block
        x_above = x_first - np.arange(num_empty_rows, 0, -1) * x_diff
        rows_above = pd.DataFrame({
            'x': x_above,
            'y': y_first,
            'z': z_first,
            'x_velocity': 0.0,
            'y_velocity': 0.0,
            'z_velocity': 0.0
        })

        # Build x values for rows BELOW the block
        x_below = x_last + np.arange(1, num_empty_rows + 1) * x_diff
        rows_below = pd.DataFrame({
            'x': x_below,
            'y': y_last,
            'z': z_last,
            'x_velocity': 0.0,
            'y_velocity': 0.0,
            'z_velocity': 0.0
        })

        # Insert filled data into the copied DataFrame
        df_filled.iloc[start_idx:mid_idx] = rows_above.values
        df_filled.iloc[end_idx:end_idx + num_empty_rows] = rows_below.values

    return df_filled

read from multiple files

In [2]:
import pandas as pd
import numpy as np
import pyvista as pv
import glob
import os
# in such/a/folder/name_result, the CSVs will be like name_1.csv, name_2.csv, ...

# Read and concatenate all CSV files in the folder
folder_path = '3D_DATA/ra10e7_result'
# Extract the last part of the path and remove 'result'
base_name = os.path.basename(folder_path).replace('result', '')
# Create the search pattern
pattern = f"{base_name}*.csv"
# Get sorted list of matching CSV files
csv_files = glob.glob(os.path.join(folder_path, pattern)) # don't use sort

df_list = [fill_empty_rows(f) for f in csv_files]
df = pd.concat(df_list)

In [15]:
points = df[['x', 'y', 'z']].values
velocities = df[['x_velocity', 'y_velocity', 'z_velocity']].values

# Extract unique coordinate values for each axis (ensure they are sorted)
'''x_vals = np.sort(df['x'].unique())
y_vals = np.sort(df['y'].unique())
z_vals = np.sort(df['z'].unique())'''

# Determine grid dimensions
nx = 200
ny = 200
nz = 198
# nx, ny, nz = len(x_vals), len(y_vals), len(z_vals)

# Reshape coordinates
x = df['x'].values.reshape((nx, ny, nz))
y = df['y'].values.reshape((nx, ny, nz))
z = df['z'].values.reshape((nx, ny, nz))

# Create the StructuredGrid
grid = pv.StructuredGrid(x, y, z)

# Add the velocity vectors
grid.point_data['velocity'] = velocities

In [16]:
xmin, xmax, ymin, ymax, zmin, zmax = grid.bounds

# Create a 5×5×5 grid of seed points across the domain
seed_x, seed_y, seed_z = np.meshgrid(
    np.linspace(xmin, xmax, 10),
    np.linspace(ymin, ymax, 10),
    np.linspace(zmin, zmax, 10),
    indexing='ij'
)

seed_points = np.column_stack((
    seed_x.ravel(), seed_y.ravel(), seed_z.ravel()
))
seed = pv.PolyData(seed_points)


streamlines = grid.streamlines_from_source(
    source=seed,
    vectors='velocity',
    integration_direction='forward',
    max_time=0.1,
    initial_step_length=0.01,
    terminal_speed=1e-3
)

In [9]:
velocity_vectors = streamlines['velocity']
velocity_magnitude = np.linalg.norm(velocity_vectors, axis=1)
streamlines['velocity_magnitude'] = velocity_magnitude

# Visualize and export streamlines as HTML
plotter = pv.Plotter(off_screen=True)
plotter.add_mesh(grid.outline(), color='k')


plotter.add_mesh(
    streamlines.tube(radius=0.005),
    scalars='velocity_magnitude',
    cmap='viridis',  # Use the colormap specified in the function argument
    scalar_bar_args={'title': 'Velocity Magnitude'}
)

plotter.view_isometric()
# Show grid with axis labels
plotter.show_grid(
    xtitle='X',
    ytitle='Y',
    ztitle='Z',
    grid='front'  # Display the grid in front of the scene
)

plotter.export_html('output_file.html')

Test

In [11]:
df_test = df.iloc[40000 * 99 : 40000 * 100]

In [18]:
x_values = np.linspace(-0.5, 0.5, 200)
z_values = np.linspace(0, 1, 200)

dx = np.diff(x_values).mean()
dz = np.diff(z_values).mean()

grid_x, grid_z = np.meshgrid(x_values, z_values, indexing='ij')
grid = pv.StructuredGrid(grid_x, np.zeros_like(grid_x), grid_z)
velocity = np.column_stack((df_test['x_velocity'], np.zeros_like(df_test['x_velocity']), df_test['z_velocity']))
grid.point_data['velocity'] = velocity

x_seed, z_seed = np.meshgrid(
        np.linspace(x_values[0], x_values[-1], 10),
        np.linspace(z_values[0], z_values[-1], 10)
    )
x_seed = x_seed.ravel()
z_seed = z_seed.ravel()
y_seed = np.zeros_like(x_seed)

seed_points = np.column_stack((x_seed, y_seed, z_seed))
seed = pv.PolyData(seed_points)

streamlines = grid.streamlines_from_source(
    source=seed,
    vectors='velocity',
    integration_direction='both',
    max_time=10,
    initial_step_length=0.5*(dx+dz),
    terminal_speed=1e-3
)

velocity_vectors = streamlines['velocity']
velocity_magnitude = np.linalg.norm(velocity_vectors, axis=1)
streamlines['velocity_magnitude'] = velocity_magnitude

plotter = pv.Plotter(off_screen=True)
plotter.add_mesh(grid.outline(), color='k')

plotter.add_mesh(
    streamlines.tube(radius=0.5 * (dx+dz) * 0.5),
    scalars='velocity_magnitude',
    cmap='viridis',  # Use the colormap specified in the function argument
    scalar_bar_args={'title': 'Velocity Magnitude'}
)
plotter.view_xz()


plotter.show_grid(
    xtitle='X',
    ytitle='Y',
    ztitle='Z',
    grid='front'  # Display the grid in front of the scene
)

box = pv.Box(bounds=(
    -0.5, 0.5,  # x bounds
    0, 0,   # y bounds (since it's a 2D plane at y=0)
    0, 1   # z bounds
))

plotter.add_mesh(box, opacity=0.0, show_edges=False)

plotter.export_html("streamlines.html")