In [45]:
import pandas as pd
import numpy as np

In [46]:
# Read binary file
csv_name = "1mbitps4.csv"
bin_name ='1mbitps4.bin'
with open(bin_name, "rb") as f:
    byte_data = f.read()  # Read the entire file as bytes

In [47]:
# Convert bytes to a hexadecimal string
hex_string_ori = byte_data.hex()
print('----raw data')
print(hex_string_ori[:28*8])

# hex_string = hex_string_ori[6+8*12:]


----raw data
bdbc0bb9c1bc0cedc0bc0d410100007ddfe7013dc1bc02a9babf03f7c2bc04cebebc05f1c00000aaaaaaaa12bfbc0860bebc09fdbebc0af0bdbc0bc2c1bc013dc1bc0261b9bf03f9c2bc04d1bebc05f2c00000aaaaaaaa12bfbc085cbebc09ffbebc0af2bdbc0bcac1bc0cf4c0bc0d37


In [50]:
def find_indicator_indices(long_string, substring):
    indices = []
    start = 0
    
    while start < len(long_string):
        start = long_string.find(substring, start)
        if start == -1:
            break
        indices.append(start)
        start += len(substring)  # Move past the current match
    return indices








In [51]:
# Quality control of strings
# The interval between two 'aaaaaaaa' should be 14(nCh)*8(nBits)
# Find all the indices where the intervals are not aligned.
display_flag = 0
nCh=14
nBits=8
full_width = int(nCh * nBits)
half_width = int(nCh / 2 * nBits)

indicator_indices = find_indicator_indices(hex_string_ori, 'aaaaaaaa')

if display_flag:
    print(indicator_indices[:100])
diff_ind_indices = [indicator_indices[i+1] - indicator_indices[i] for i in range(len(indicator_indices) - 1)]
if display_flag:
    print('The intervals between two adjacent aaaaaaaa:')
    print(diff_ind_indices[:100])

not_norm_indices = set()
for i, value in enumerate(diff_ind_indices):
    if value != nCh * nBits:
        not_norm_indices.add(i)
        not_norm_indices.add(i + 1)

# Convert to sorted list if needed
not_norm_indices = sorted(not_norm_indices)
print(not_norm_indices)

if not (not_norm_indices):
    print('The data passed the QA check!')
else:
    print('Some missing/redundant data exist!:')
    for ind in not_norm_indices: 
        print(f'{indicator_indices[ind]}')



norm_diff_indices = []

for i in range(1, len(diff_ind_indices)):  # Avoid the first and last elements
    if diff_ind_indices[i-1] == full_width and diff_ind_indices[i] == full_width:
        norm_diff_indices.append(i)

print('Check the first 10 indices where the intervals are 112 from the previous and the next aaaaaaaa:')
print(norm_diff_indices[:10])



[0, 1]
Some missing/redundant data exist!:
78
166
Check the first 10 indices where the intervals are 112 from the previous and the next aaaaaaaa:
[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]


In [52]:
# full_width

In [54]:
# only the norm indices can be used for detecting the voltage.
discarded_indicator_indices = [indicator_indices[i] for i in not_norm_indices]
selected_indicator_indices = [indicator_indices[i] for i in norm_diff_indices]
print('Check the first 10 indices in the original data string which will be discarded:')
print(discarded_indicator_indices[:10])
print('Check the first 10 indices in the original data string which are robust:')
print(selected_indicator_indices[:10])

Check the first 10 indices in the original data string which will be discarded:
[78, 166]
Check the first 10 indices in the original data string which are robust:
[278, 390, 502, 614, 726, 838, 950, 1062, 1174, 1286]


In [55]:
chunks = []
for idx in selected_indicator_indices:
    start = max(0, idx - half_width)
    end = min(len(hex_string_ori), idx + half_width)
    chunk = hex_string_ori[start:end]
    chunks.append(chunk)

print(f'length of chunks (timestamps): {len(chunks)}')
print(chunks[:10])
# each chunk belongs to the same timestamp

length of chunks (timestamps): 15320
['37010000dee4e70139c1bc0219b7bf03fbc2bc04d1bebc05f3c00000aaaaaaaa11bfbc085dbebc09f8bebc0aedbdbc0bbdc1bc0cefc0bc0d', '420100003fe7e70137c1bc02dbb3bf03efc2bc04cbbebc05f4c00000aaaaaaaa06bfbc0851bebc09eebebc0aebbdbc0bbac1bc0cefc0bc0d', '4601000093e9e70132c1bc0268b2bf03edc2bc04c6bebc05f5c00000aaaaaaaa05bfbc0853bebc09eabebc0ae5bdbc0bb9c1bc0cecc0bc0d', '3f01000005ece70132c1bc024db2bf03f2c2bc04c9bebc05f6c00000aaaaaaaa0cbfbc085abebc09f3bebc0ae5bdbc0bbbc1bc0ceac0bc0d', '3f01000079eee7013bc1bc0214b2bf03f4c2bc04cebebc05f7c00000aaaaaaaa0fbfbc085dbebc09f4bebc0ae6bdbc0bbec1bc0cf0c0bc0d', '4b01000002f1e70140c1bc022db2bf03f6c2bc04d1bebc05f8c00000aaaaaaaa0abfbc0857bebc09f2bebc0ae5bdbc0bb9c1bc0cf2c0bc0d', '5201000084f3e7014bc1bc026fb2bf030bc3bc04e1bebc05f9c00000aaaaaaaa22bfbc0871bebc0906bfbc0afabdbc0bd6c1bc0c00c1bc0d', '4e0100006af6e70141c1bc02dcb2bf0303c3bc04d3bebc05fac00000aaaaaaaa17bfbc0864bebc0902bfbc0af5bdbc0bcec1bc0cfac0bc0d', '450100005df9e7014ec1bc0261b3bf030

In [56]:
# Function to swap adjacent byte pairs
def swap_adjacent_pairs(hex_str):
    return "".join([hex_str[i:i+2] for i in range(0, len(hex_str), 2)][::-1])


def hex_to_signed_int(hex_str):
    int_value = int(hex_str, 16)
    if int_value >= 2**23:  # If the value is negative in two’s complement
        int_value -= 2**24
    return int_value

def cal_vol_from_digit(chunk):
    signed_int = hex_to_signed_int(chunk[2:])  # Example: -1
    # print(signed_int)  # Output: -1
    signed_vol = signed_int * 5.00 / (2**(24-1)-1)
       
    return signed_vol



In [59]:
# Since only robust data in the chunks -> we can strictly set the timestamp here
# indicator_indices
# selected_indicator_indices
# discarded_indicator_indices
# Define your columns
columns = ['timestamp', 'ch0', 'ch1', 'ch2', 'ch3', 'ch4', 'ch5', 
           'ch8', 'ch9', 'cha', 'chb', 'chc', 'chd']

# Let's say you expect 100 rows
num_rows = len(indicator_indices)
df = pd.DataFrame(np.nan, index=range(num_rows), columns=columns)

# Convert to sets just for lookup
selected_ind_set = set(selected_indicator_indices)
discarded_ind_set = set(discarded_indicator_indices)

ch_size = 8  # 8 hex characters = 32 bits

for num,idx in enumerate(indicator_indices):
    if num%5000==0:
        print(f'processing {num}/{len(indicator_indices)}')
    if idx in selected_ind_set:
      
        # crop the full width of chunk, 
        start = max(0, idx - half_width)
        end = min(len(hex_string_ori), idx + half_width)
        chunk = hex_string_ori[start:end]

        # separate each channel  
        ori_channels = [chunk[i:i+ch_size] for i in range(0, len(chunk), ch_size)]
        # swap_pairs
        channels = [swap_adjacent_pairs(item) for item in ori_channels]
 
        # compute timestamp
        timestamp = hex_to_signed_int(channels[6])
        
        # compute voltage 
        vol_list=[]
        for iCh in [0,1,2,3,4,5,8,9,10,11,12,13]:
            vol = cal_vol_from_digit(channels[iCh])
            vol_list.append(vol)
        # fill the dataframe
        df.loc[num, 'timestamp'] = timestamp
        if len(vol_list) == 12:
            df.loc[num, ['ch0','ch1','ch2','ch3','ch4','ch5','ch8','ch9','cha','chb','chc','chd']] = vol_list  # fill some example channels
        else:
            print('Voltage channels are less than 12, invalid!!')
        
    elif idx in discarded_ind_set:
       
        # crop the timestamp ahead of aaaaaaaa
        start = max(0, idx - ch_size)
        end = min(len(hex_string_ori), idx+ch_size)
        chunk = hex_string_ori[start:end]
  
        # swap order and 
        chunk = swap_adjacent_pairs(chunk[:8])
        # compute timestamp
        timestamp = hex_to_signed_int(chunk)
 
         # fill the dataframe
        df.loc[num, 'timestamp'] = timestamp
        df.loc[num, ['ch0','ch1','ch2','ch3','ch4','ch5','ch8','ch9','cha','chb','chc','chd']] = np.nan  # fill some example channels
        
            
    else:
        print(f"idx-{idx} not in the previous two conditions: maybe a bug or the last chunk of the data")

processing 0/15323
processing 5000/15323
processing 10000/15323
processing 15000/15323
idx-1716118 not in the previous two conditions: maybe a bug or the last chunk of the data


In [60]:
df[:10]

Unnamed: 0,timestamp,ch0,ch1,ch2,ch3,ch4,ch5,ch8,ch9,cha,chb,chc,chd
0,49393.0,,,,,,,,,,,,
1,49394.0,,,,,,,,,,,,
2,49395.0,0.000185,-0.94164,-2.626767,-2.511124,-2.626499,-2.627134,-2.627096,-2.627203,-2.627111,-2.62727,-2.626688,-2.626811
3,49396.0,0.000192,-0.941277,-2.626768,-2.511619,-2.626506,-2.627138,-2.627102,-2.62721,-2.627117,-2.627271,-2.62669,-2.626811
4,49397.0,0.000194,-0.940922,-2.626771,-2.51184,-2.626507,-2.627141,-2.627103,-2.627209,-2.627119,-2.627275,-2.626691,-2.626813
5,49398.0,0.00019,-0.940549,-2.626771,-2.511856,-2.626504,-2.627139,-2.627099,-2.627205,-2.627114,-2.627275,-2.626689,-2.626814
6,49399.0,0.00019,-0.940175,-2.626766,-2.51189,-2.626503,-2.627136,-2.627097,-2.627203,-2.627113,-2.627274,-2.626688,-2.62681
7,49400.0,0.000197,-0.939788,-2.626763,-2.511875,-2.626502,-2.627134,-2.6271,-2.627207,-2.627114,-2.627275,-2.626691,-2.626809
8,49401.0,0.000201,-0.939405,-2.626756,-2.511836,-2.626489,-2.627125,-2.627086,-2.627191,-2.627102,-2.627262,-2.626673,-2.626801
9,49402.0,0.000199,-0.938963,-2.626762,-2.511771,-2.626494,-2.627133,-2.627092,-2.627199,-2.627105,-2.627265,-2.626678,-2.626804


In [61]:
df.to_csv(csv_name, index=False)


295200

input:
.bin data

output:
- vol, rotation (rad/degree),



formula: 
calculation of each angle = linear combination of channels

- aCh1+bCh2+cCh3+dCh4+eCh5+ +Ch8+cCh9 ... +dCh14


synchronization
1. detect 8 digits of a
2. locate the beginning of the string
3. reorder the every two digits in each chunk element to find the timestamp (8 digits before aaaaaaaa)

calibration:
linear scaling.
- manual input of scaling factor
- automatic estimation of the factor
  
considerations:
- how to synchronize between illustration and recording.
- the scaling factor is different across two eyes.
   - scaling factor per angle (?)
   - 4 cali points (ref1, ref2, ref3, ref4), (actual1,.. actual4) -> how to combine the set of angles?
  
- Currently the timestamp starts from the hardware