In [3]:
import struct

def addData(pathFile: str, records: tuple):
    with open(pathFile, "ab") as file:
        data = struct.pack("20si20s", records[0].encode(), records[1], records[2].encode())
        file.write(data)


if __name__ == "__main__":
    list_records = [("Alice", 25, "Lost Angeles"),
                    ("Bob", 30, "New York"),
                    ("Helen", 35, "Chicago"),
                    ("Dylan", 40, "San Francisco"),
                    ("Eva", 45, "Miami"),
                    ("Frank", 50, "Las Vegas"),
                    ("Grace", 55, "Seattle"),
                    ("Henry", 60, "Boston"),
                    ("Ivy", 65, "Houston"),
                    ("Jack", 70, "Philadelphia"),
                    ("Kathy", 75, "Denver"),
                    ("Leo", 80, "Phoenix"),
                    ("Mia", 85, "Portland"),
                    ("Nick", 60, "San Diego"),
                    ("Olivia", 45, "Washington"),
                    ("Peter", 30, "Austin"),
                    ("Quinn", 15, "Dallas"),]
    for record in list_records:
        addData("data.bin", record)

In [53]:
import struct

def readAllData(pathFile: str) -> list:
    result = []
    with open(pathFile, "rb") as file:
        while True:
            data = file.read(struct.calcsize("20si20s"))
            if not data:
                break
            record = struct.unpack("20si20s", data)
            record = (record[0].decode().strip('\x00'), record[1], record[2].decode().strip('\x00'))
            result.append(record)
    return result

if __name__ == "__main__":
    print(readAllData("testData.bin"))

[('Quinn', 15, 'Dallas'), ('Alice', 25, 'Lost Angeles'), ('Bob', 30, 'New York'), ('Helen', 35, 'Chicago'), ('Dylan', 40, 'San Francisco'), ('Eva', 45, 'Miami'), ('Frank', 50, 'Las Vegas'), ('Grace', 55, 'Seattle'), ('Henry', 60, 'Boston'), ('Ivy', 65, 'Houston'), ('Jack', 70, 'Philadelphia'), ('Kathy', 75, 'Denver'), ('Leo', 80, 'Phoenix'), ('Mia', 85, 'Portland'), ('Nick', 60, 'San Diego'), ('Olivia', 45, 'Washington'), ('Peter', 30, 'Austin'), ('Quinn', 15, 'Dallas')]


In [21]:
import struct

def addData(pathFile: str, records: dict):
    with open(pathFile, "ab") as file:
        data = struct.pack("20si20s", records['name'].encode(), records['age'], records['city'].encode())
        file.write(data)


if __name__ == "__main__":
    records = {"name": "Alice", "age": 25, "city": "Lost Angeles"}
    addData("testDataDict.bin", records)

In [14]:
import struct

def readAllData(pathFile: str) -> list:
    result = []
    with open(pathFile, "rb") as file:
        while True:
            data = file.read(struct.calcsize("20si20s"))
            if not data:
                break
            record = struct.unpack("20si20s", data)
            record = (record[0].decode().strip('\x00'), record[1], record[2].decode().strip('\x00'))
            result.append({"name": record[0], "age": record[1], "city": record[2]})
    return result

if __name__ == "__main__":
    print(readAllData("testDataDict.bin"))

[{'name': 'Alice', 'age': 25, 'city': 'Lost Angeles'}, {'name': 'Alice', 'age': 25, 'city': 'Lost Angeles'}]


# Numpy


In [57]:
import struct

def addData(pathFile: str, records: tuple):
    with open(pathFile, "ab") as file:
        data = struct.pack("20s20s20sff", records[0].encode(), records[1].encode(), records[2].encode(), records[3], records[4])
        file.write(data)


if __name__ == "__main__":
    list_records = [
        # data employee
        ("0001", "Robert", "Engineering", 90, 40000),
        ("0002", "Alice", "Sales", 80, 25000),
        ("0003", "John", "Marketing", 85, 35000),
        ("0004", "Emily", "Finance", 95, 45000),
        ("0005", "David", "HR", 75, 30000),
        ("0008", "Jessica", "Marketing", 87, 38000),
        ("0009", "Daniel", "Finance", 93, 47000),
        ("0010", "Sophia", "HR", 72, 28000),
        ("0011", "Matthew", "Engineering", 88, 41000),
        ("0012", "Emma", "Sales", 82, 27000),
        ("0013", "Andrew", "Marketing", 90, 36000),
        ("0014", "Olivia", "Finance", 97, 48000),
        ("0015", "James", "HR", 80, 32000),
        ("0006", "Sarah", "Engineering", 92, 42000),
        ("0007", "Michael", "Sales", 78, 23000),
    ]
    for record in list_records:
        addData("testData.bin", record)

In [90]:
import numpy as np
import pandas as pd

def showAllData():
    with open("testData.bin", "rb") as file:
        record_size = struct.calcsize("20s20s20sff")
        list_records = []
        while True:
            data = file.read(record_size)
            if not data:
                break
            records = struct.unpack("20s20s20sff", data)
            records = (records[0].decode().strip('\x00'), records[1].decode().strip(
                '\x00'), records[2].decode().strip('\x00'), records[3], records[4])
            list_records.append(records)
    # Define the data types for each field
    dtype = [('ID', 'U4'),    # 4-character string for ID
            ('Name', 'U10'), # 10-character string for Name
            ('Department', 'U10'), # 10-character string for Department
            ('Score', 'f4'), # float for Score
            ('Salary', 'f4')] # float for Salary
    # Create a structured NumPy array
    data_arr = np.asarray(list_records, dtype=dtype)
    # Sort by the 'ID' field
    sorted_data = np.sort(data_arr, order='ID')
    # Convert to Pandas DataFrame for display
    df_data_arr = pd.DataFrame(sorted_data)
    # Display the data without the index column
    print(df_data_arr.to_string(index=False))


def showSpecificData(col: int, list_search:list):
    with open("testData.bin", "rb") as file:
        record_size = struct.calcsize("20s20s20sff")
        list_records = []
        while True:
            data = file.read(record_size)
            if not data:
                break
            records = struct.unpack("20s20s20sff", data)
            records = (records[0].decode().strip('\x00'), records[1].decode().strip(
                '\x00'), records[2].decode().strip('\x00'), records[3], records[4])
            list_records.append(records)
    # Define the data types for each field
    dtype = [('ID', 'U4'),          # 4-character string for ID
            ('Name', 'U10'),        # 10-character string for Name
            ('Department', 'U10'),  # 10-character string for Department
            ('Score', 'f4'),        # float for Score
            ('Salary', 'f4')]       # float for Salary
    # Create a structured NumPy array
    data_arr = np.asarray(list_records, dtype=dtype)
    # Convert 'col' to the appropriate field name
    field = ['ID', 'Name', 'Department', 'Score', 'Salary'][col]
    # Filter the structured array based on the field
    fltr = np.asarray(list_search)
    data_fltr = data_arr[np.in1d(data_arr[field], fltr)]
    # Convert to Pandas DataFrame for display
    df_data_fltr = pd.DataFrame(data=data_fltr)
    print(df_data_fltr.to_string(index=False))

showSpecificData(1, ['Emily', 'Matthew'])
print()
showSpecificData(3, [90])
print()
showSpecificData(4, [40000, 45000])
print()
showAllData()

  ID    Name Department  Score  Salary
0004   Emily    Finance   95.0 45000.0
0011 Matthew Engineerin   88.0 41000.0

  ID   Name Department  Score  Salary
0001 Robert Engineerin   90.0 40000.0
0013 Andrew  Marketing   90.0 36000.0

  ID   Name Department  Score  Salary
0001 Robert Engineerin   90.0 40000.0
0004  Emily    Finance   95.0 45000.0

  ID    Name Department  Score  Salary
0001  Robert Engineerin   90.0 40000.0
0002   Alice      Sales   80.0 25000.0
0003    John  Marketing   85.0 35000.0
0004   Emily    Finance   95.0 45000.0
0005   David         HR   75.0 30000.0
0006   Sarah Engineerin   92.0 42000.0
0007 Michael      Sales   78.0 23000.0
0008 Jessica  Marketing   87.0 38000.0
0009  Daniel    Finance   93.0 47000.0
0010  Sophia         HR   72.0 28000.0
0011 Matthew Engineerin   88.0 41000.0
0012    Emma      Sales   82.0 27000.0
0013  Andrew  Marketing   90.0 36000.0
0014  Olivia    Finance   97.0 48000.0
0015   James         HR   80.0 32000.0
