# Scale


## File format

| Location | Length | Endianess | format                      | Value      |
| -------- | ------ | --------- | --------------------------- | ---------- |
| 0x0      | 0x200  |           |                             | header     |
| 0x200    | 2 byte |           |                             | padding    |
| 0x202    | 4 byte | little    | float                       | Time (min) |
| 0x206    | 4 byte | big       | int \* 20, two's complement | 190        |
| 0x22A    | 4 byte | big       | int \* 20, two's complement | 200        |
| ...      |        |           |                             |            |
| 0x298    | 4 byte | big       | int \* 20, two's complement | 360        |
| 0x29C    | 2 byte |           |                             | padding    |
| ...      |        |           |                             |            |
| EoF      |        |           |                             |            |

no footer


## Observations

line3 -> 0.0004 -> 0x39d1b717 ->  (17 b7 d1 39) -> location: 0x250

line4 -> 0.0009 -> 0x3a6bedfa ->  (fa ed 6b 3a) -> location: 0x29e

line5 -> 0.0013 -> 0x3aaa64c3 -> (c3 64 aa 3a) -> location: 0x2ec

...

line-1 -> 5.0000 -> 0x40a00000 ->  (00 00 a0 40) -> location: 0xdb9d6



`48 48`

line 2: [0.0000,-1,-2,27,23,98,1,48,69,-1,-2093,39,-1,822,1,0,16,40,0]

`00 00 00 00 [FF FF FF EC] [FF FF FF D8] 00 00 02 1C 00 00 01 CC 00 00 07 A8 00 00 00 14 00 00 03 C0 00 00 05 64 [FF FF FF EC] FF FF 5C 7C 00 00 03 0C [FF FF FF EC] 00 00 40 38 00 00 00 14 00 00 00 00 00 00 01 40 00 00 03 20 00 00 00 00 [48 48]`

line 3: [0.0004,0,0,26,23,99,1,50,70,-1,-2109,36,-1,830,-1,0,14,40,-1]

`17 B7 D1 39 00 00 00 00 00 00 00 00 00 00 02 08 00 00 01 CC 00 00 07 BC 00 00 00 14 00 00 03 E8 00 00 05 78 [FF FF FF EC] FF FF 5B 3C 00 00 02 D0 [FF FF FF EC] 00 00 40 D8 [FF FF FF EC] 00 00 00 00 00 00 01 18 00 00 03 20 [FF FF FF EC] 48 48`

line 4: [0.0009,-2,1,26,24,100,-1,47,68,-2,-2132,38,-1,835,0,-2,15,40,0]

`FA ED 6B 3A [FF FF FF D8] 00 00 00 14 00 00 02 08 00 00 01 E0 00 00 07 D0 [FF FF FF EC] 00 00 03 AC 00 00 05 50 [FF FF FF D8] FF FF 59 70 00 00 02 F8 [FF FF FF EC] 00 00 41 3C 00 00 00 00 [FF FF FF D8] 00 00 01 2C 00 00 03 20 [00 00 00 00 48 48]` 


Seems `48 48` padding is used, and data is in 4-byte chunks. let me map -1, -2, 1, 2 to it's hex


In [1]:
import struct
import binascii


def float_to_hex(f):
    print(hex(struct.unpack('<I', struct.pack('<f', f))[0]))


float_to_hex(0.0004)
float_to_hex(0.0009)
float_to_hex(0.0013)
float_to_hex(0.3809)
float_to_hex(5.0000)
float_to_hex(0.5622)


0x39d1b717
0x3a6bedfa
0x3aaa64c3
0x3ec30553
0x40a00000
0x3f0fec57


In [2]:
# negative number is 2-complement, then scale 20 to hex
def int_scale_to_hex(i):
    if i < 0:
        i = 2 ** 32 + i
    return hex(i * 20)


def hex_to_int_scale(h):
    i = int(h, 16) // 20
    if i >= 2 ** 31:
        i -= 2 ** 32
    return i


for x in [-1, -2, 27, 23, 98, 1, 48, 69, -1, -2093, 39, -1, 822, 1, 0, 16, 40,
          0]:
    hex_val = int_scale_to_hex(x)
    print(x, hex_val, hex_to_int_scale(hex_val))

-1 0x13ffffffec -1
-2 0x13ffffffd8 -2
27 0x21c 27
23 0x1cc 23
98 0x7a8 98
1 0x14 1
48 0x3c0 48
69 0x564 69
-1 0x13ffffffec -1
-2093 0x13ffff5c7c -2093
39 0x30c 39
-1 0x13ffffffec -1
822 0x4038 822
1 0x14 1
0 0x0 0
16 0x140 16
40 0x320 40
0 0x0 0


In [3]:
import struct
import pandas as pd
import os


def byte_to_int(byte_str):
    """
    Converts a 4-byte string to an integer, handling two's complement for negative values.

    :param byte_str: 4-byte string to convert.
    :return: Integer representation of the byte string.
    """

    # Convert the byte string to an unsigned integer
    unsigned_int = int.from_bytes(byte_str, byteorder='big', signed=False)

    # Check if the integer is negative (using 2's complement)
    if unsigned_int >= 2 ** 31:
        return (unsigned_int - 2 ** 32)
    else:
        return unsigned_int


def extract_scale_to_df(input_path, header_size=0x200, footer_size=0):
    """
    Extracts binary data from a file to a DataFrame.

    :param input_path: Path to the binary file.
    :param header_size: Size of the header in bytes.
    :param footer_size: Size of the footer in bytes.
    :return: DataFrame containing the extracted data.
    """
    # Initialize columns
    columns = [[] for _ in
               range(19)]  # Assuming 19 columns based on the provided data

    with open(input_path, 'rb') as f:
        # Skip the header
        f.seek(header_size)

        # Calculate the size of the body (excluding header and footer)
        file_size = os.path.getsize(input_path)
        body_size = file_size - header_size - footer_size

        # Read the body
        bytes_read = 0

        while bytes_read < body_size:

            chunk = f.read(78)

            # Ignore the first 2 padding bytes [48 48]
            chunk = chunk[2:]

            # Get first column as float round to 4 digit
            float_val = struct.unpack('<f', chunk[:4])[0]
            columns[0].append(f"{float_val:.4f}")

            # Extract the rest of the chunk as integers, downscale by 20
            for i in range(1, 19):
                int_val = byte_to_int(chunk[i * 4:(i + 1) * 4]) // 20
                columns[i].append(int_val)

            bytes_read += 78

    # Create DataFrame
    # Time (min),190,200,210,220,230,240,250,260,270,280,290,300,310,320,330,340,350,360

    # Define the column names
    column_names = ['Time (min)', 190, 200, 210, 220, 230, 240, 250, 260, 270,
                    280, 290, 300, 310, 320, 330, 340, 350, 360]

    # Create DataFrame using dictionary comprehension
    df = pd.DataFrame({name: columns[i] for i, name in enumerate(column_names)})
    return df


def main(input_path=None, header_size=0x200, footer_size=0):
    """
    Extracts binary data from a file to a DataFrame and saves it to a CSV file.

    :param input_path: Path to the binary file.
    :param header_size: Size of the header in bytes.
    :param footer_size: Size of the footer in bytes.
    """
    if input_path is None:
        input_path = input('Enter the path to the binary file: ')

    # Extract the binary data to a DataFrame
    df = extract_scale_to_df(input_path, header_size, footer_size)
    # Save the DataFrame to a CSV file
    df.to_csv(input_path + '.csv', index=False)


# Test

In [4]:
header_size = 0x200
footer_size = 0
file_path = "./scale/sample/scale"
result = extract_scale_to_df(file_path, header_size, footer_size)
print("head\n", result.head())
print("tail\n", result.tail())

# Read the CSV file into a DataFrame
csv_df = pd.read_csv("./scale/sample/scale.csv")

# Compare the DataFrames
print("DataFrames are equal:", result.equals(csv_df))

head
   Time (min)  190  200  210  220  230  240  250  260  270   280  290  300  \
0     0.0000   -1   -2   27   23   98    1   48   69   -1 -2093   39   -1   
1     0.0004    0    0   26   23   99    1   50   70   -1 -2109   36   -1   
2     0.0009   -2    1   26   24  100   -1   47   68   -2 -2132   38   -1   
3     0.0013    0   -1   25   21  101   -1   47   68   -1 -2153   39   -2   
4     0.0017   -2    0   26   21   98   -1   48   68    1 -2173   37    0   

   310  320  330  340  350  360  
0  822    1    0   16   40    0  
1  830   -1    0   14   40   -1  
2  835    0   -2   15   40    0  
3  841   -2   -1   13   41    0  
4  846    0    0   14   40   -1  
tail
       Time (min)  190  200  210  220  230  240  250  260  270  280  290  300  \
11522     4.9983    0   -1   26   22  101    1   49   70   -1   75   -1   -1   
11523     4.9987   -2   -1   27   23  100   -2   47   71    1   76    0    0   
11524     4.9991   -2    1   26   23  100   -2   50   70    0   74    0   -2   
1

## Decode the binary files in the problem folders

In [5]:
file_paths = [
    # "./scale/problem1/scale",
    # "./scale/problem2/scale",
    # "./scale/problem3/scale",
    "./scale/sample/scale",  # git diff
]

for p in file_paths:
    main(p)