
# Transform zebrafish 3d data for visualisation in Unity
 created 7 Mar 2025, last updated 4 Apr 2025

In [2]:
import numpy as np
import h5py
import csv
from scipy.io import loadmat
from scipy.io.matlab._mio5_params import mat_struct
import pandas as pd
import regex as re

In [7]:
# first get position data from fish 68 and save it to a csv file
# remember to refactor to use scipy for consistency ... once it works
matfile = "Calcium data/Hom/raw_fish_std_fmt_68.mat"
csv_filename = 'ZF.calcium_position_data.csv'

# get the position data from the matlab file 
header = ['SWC Index', 'SWC Type', 'xpos', 'ypos', 'zpos', 'radius', 'SWC Parent','Label', 'Region', 'Subregion']
fishData = ExtractPositionData(matfile)

# set up columns to add region details
fishData['Label'] = ""
fishData['Region']  = [[] for _ in range(fishData.shape[0])] # add empty list for region
fishData['Subregion'] = [[] for _ in range(fishData.shape[0])] # add empty list for subregion

# next get region breakdown for each neuron in fish 68
file = "Calcium data/region_roi_information/regioninfo_68.mat"

# Extract regions and subregions as a dataframe 
regionsdf = Extract_Regions_and_Subregions(file)

# create a list of wrong indicies
wrongix = []

for index, row in regionsdf.iterrows():
        thisRegion = row['Region']
     
        if (thisRegion is not None) and (row['Idx'] is not None):
            for thisstr in row['Idx']:
                    # Ensure thisstr is a string before applying regex
                if isinstance(thisstr, str):
                    indicies = [int(num) for num in re.findall(r'\d+', thisstr)]
                elif isinstance(thisstr, int):  # Handle single integers
                    indicies = [thisstr]
                else:
                    indicies = []  # Handle unexpected cases

                for ix in indicies:
                    if 0 <= ix < len(fishData):
                        # region is 8th column
                        # add this region to the list of regions
                        fishData.iloc[ix, 8].append(thisRegion)
                    else:
                        print(f"Index {ix} is out of bounds for fishData.")
                        wrongix.append(ix)
            
print("these are the wrong indicies")
print(wrongix)

regions_filename = 'ZF.region_and_subregion_data.csv'
regionsdf.to_csv(regions_filename, index=False)

# create a simplifed version with just the regions (not broken down by subregion)
simple_df = regionsdf.groupby('Region', as_index=False).agg({
        'Idx': lambda x: [item for sublist in x if sublist is not None for item in sublist]})
simple_csv_filename = 'ZF.simple_region_data.csv'
simple_df.to_csv(simple_csv_filename, index=False)

# save fishdata with regions and subregions
fishData.to_csv(csv_filename, index=False, header=header)




Index 17261 is out of bounds for fishData.
Index 17261 is out of bounds for fishData.
Index 17261 is out of bounds for fishData.
these are the wrong indicies
[17261, 17261, 17261]


In [130]:
fishData.iloc[17260:,]

Unnamed: 0,SWC Index,SWC Type,xpos,ypos,zpos,radius,SWC Parent,Label,Region,Subregion
17260,17260,0,334.241,526.155,126.973,1,0,,"[Diencephalon, Diencephalon]",[]


In [3]:

# get the ROI breakdown and save to CSV file
def ExtractROIBreakdown(f, saveFName):
       print("Extracting ROI breakdown data...")

def Extract_Regions_and_Subregions(file):

    try:
        regionData = loadmat(file, struct_as_record=False, squeeze_me=True)
        # keys are
        #  ['__header__', '__version__', '__globals__', 'fish_ROI_centroids', 'perRegion']
    except Exception as e:
        print(f"Error reading file: {e}")

    region_struct = regionData['perRegion']
    #Attributes in perRegion:
    #'_fieldnames': ['Diencephalon', 'Ganglia', 'Mesencephalon', 'Rhombencephalon', 'Spinal', 'Telencephalon'], 'Diencephalon'

    regions = []
    subregions = []
    indices = []  # To store the idx values

    # Iterate through the top-level regions
    for region_name in region_struct._fieldnames:
        if region_name is not None:  
                subregion_data = getattr(region_struct, region_name)
                # Check if subregion_data is a MATLAB struct
                if isinstance(subregion_data, mat_struct):
                    # Iterate through the fields of the subregion struct
                    for subregion_name in subregion_data._fieldnames:
                        subregion = getattr(subregion_data, subregion_name)
                        regions.append(region_name)
                        subregions.append(subregion_name)

                        # Check if the subregion has an 'idx' field
                        if hasattr(subregion, 'idx'):
                            idx_value = subregion.idx
                            # Ensure idx_value is a list
                            if isinstance(idx_value, int):
                                indices.append([idx_value])  # Wrap single int in a list
                            elif isinstance(idx_value, np.ndarray):
                                indices.append(idx_value.tolist())  # Convert NumPy array to list
                            else:
                                indices.append(idx_value)  # Assume it's already a list
                        else:
                            indices.append([])  # Append an empty list if 'idx' is missing
                elif isinstance(subregion_data, (np.ndarray, list)):
                    # If subregion_data is iterable, iterate through it
                    for subregion in subregion_data:
                        regions.append(region)
                        subregions.append(subregion)
                        indices.append([])  # Append an empty list if 'idx' is not applicable
                else:
                    # If it's a single subregion, add it directly
                    regions.append(region)
                    subregions.append(subregion_data)
                    indices.append([])  # Append an empty list if 'idx' is not applicable

    # Create a DataFrame to store the results
    region_df = pd.DataFrame({
        'Region': regions,
        'Subregion': subregions,
        'Idx': indices
    })

    return region_df


# get the position (ROI centroids) and save to CSV file
def ExtractPositionData(thisfile):
        
        f = h5py.File(thisfile,'r')

        yxzpos = f.get('fish_ROI_centroids')
        swc_data = []

        # Check if the data is a MATLAB cell array
        if yxzpos.attrs.get('MATLAB_class') == b'double':
            # Iterate through the elements of the cell array
            nbrOfCols = len( yxzpos[0])
            for j in range(0,nbrOfCols):
                    #if j < 3:
                        # print(test[i])
                    SWCIndex = j;
                    SWCType = 0;
                    SWCRadius = 1;
                    SWCParent = 0;
                    ypos = yxzpos[0][j]
                    xpos = yxzpos[1][j]
                    zpos = yxzpos[2][j]
                    swc_data.append([SWCIndex, SWCType, xpos, ypos, zpos, SWCRadius, SWCParent])
                    radius = 1   
        else:
            print("\n!!!!Non matlab cell array Shape:", yxzpos.shape)
 
        # convert list to dataframe
        swc_df = pd.DataFrame(swc_data, columns=['SWC Index', 'SWC Type', 'xpos', 'ypos', 'zpos', 'radius', 'SWC Parent'])

        return swc_df



In [None]:
# 


In [None]:

# this code needs more work
# get the fish stim trains normalised signal data
signals = f.get('fish_stim_trains')

all_signals = []

# Check if the data is a MATLAB cell array
if signals.attrs.get('MATLAB_class') == b'cell':
    nbrOfCols = len(signals)
    # Iterate through the elements of the cell array
    for i in range(nbrOfCols):
        cell_element = signals[i]
        # Dereference the HDF5 object reference
        dereferenced_element = f[cell_element[0]]
        cell_data = np.array(dereferenced_element)

        
        neuron_index_column = np.full((cell_data.shape[0], 1), i)
        cell_data_with_index = np.hstack((neuron_index_column, cell_data))
        print(cell_data_with_index)

        all_signals.append(cell_data_with_index)

else:
    # Convert to a NumPy array and print the shape and data type
    signals = np.array(signals)
    print("\n!!!!Non matlab cell array Shape:", signals.shape)
    print("Data type:", signals.dtype)
    print(signals)
    all_signals.append(signals)

# Convert the list of signals to a NumPy array
all_signals_array = np.vstack(all_signals)

# Print the resulting NumPy array
print("\nAll Signals Array:")
print(all_signals_array.shape)
#print(all_signals_array)


# Extract the first column (neuron IDs) and get the unique IDs
neuron_ids = all_signals_array[:, 0]
unique_neuron_ids = np.unique(neuron_ids)

# Print the unique neuron IDs
print("\nUnique Neuron IDs:")
print(unique_neuron_ids)

[[ 0.          0.02115322  0.03495911 ...  0.01705254  0.01064011
   0.00040685]
 [ 0.          0.02772463  0.05486758 ...  0.01479071  0.00439129
   0.00624719]
 [ 0.          0.03011667  0.08399945 ...  0.0164024   0.01734564
   0.01085587]
 ...
 [ 0.          0.02041504  0.01531125 ... -0.00708327  0.00158612
  -0.00681418]
 [ 0.          0.01391963  0.01590708 ...  0.0051722   0.00316357
   0.00900438]
 [ 0.          0.01685828  0.00661121 ...  0.00389796 -0.00510769
  -0.00437695]]

All Signals Array:
(4200, 17262)

Unique Neuron IDs:
[0.]


In [None]:
signals = f.get('fish_stim_trains')