In [18]:
import math
import pandas as pd

In [2]:
data = pd.read_excel('./dataset/Coffee Shop Sales.xlsx')
data.to_csv('./dataset/Coffee Shop Sales.csv', index=False)

In [3]:
data2 = pd.read_csv('./dataset/Coffee Shop Sales.csv')

In [4]:
columns = data2.columns
size_per_column = dict()
for column in columns:
    elements = data2[column].unique()
    elements_size = [len(str(elem)) for elem in elements]
    size_per_column[column] = max(elements_size)
print(size_per_column)
print(list(size_per_column.values()))

{'transaction_id': 6, 'transaction_date': 10, 'transaction_time': 8, 'transaction_qty': 1, 'store_id': 1, 'store_location': 15, 'product_id': 2, 'unit_price': 5, 'product_category': 18, 'product_type': 21, 'product_detail': 28}
[6, 10, 8, 1, 1, 15, 2, 5, 18, 21, 28]


In [37]:
class Fixed_Size_Heap:
    def __init__(self, block_size, field_sizes, filename):
        self.block_size = block_size
        self.field_sizes = field_sizes
        self._set_record_size()
        self.blocks = []
        self._read_file(filename)
        self.deleted_records = []

    def _set_field_names(self, line):
        self.field_names = line.split(',')

    def _set_record_size(self):
        self.record_size = sum(self.field_sizes) + len(self.field_sizes)
    
    def _padding(self, field, field_id):
        diff = self.field_sizes[field_id] - len(field)
        padded_field = field + (' ' * diff)
        return padded_field

    def _format_record(self, record):
        formatted_record = ''
        fields = record.strip().split(',')
        for i in range(len(fields)):
            if len(fields[i]) < self.field_sizes[i]:
                padded_field = self._padding(fields[i], i)
                formatted_record += padded_field + ','
            else:
                formatted_record += fields[i] + ','
        return formatted_record

    def _write_record(self, record):
        if self.blocks == []:
            self.blocks.append(self._format_record(record))
        elif len(self.blocks[-1]) + self.record_size < self.block_size:
            self.blocks[-1] += self._format_record(record)
        else:
            self.blocks.append(self._format_record(record))

    def _read_file(self, filename):
        with open(filename, 'r') as file:
            self._set_field_names(file.readline())
            for record in file:
                self._write_record(record)
    
    def _delete_record(self, block_id, record_id):
        offset = self.record_size * record_id
        head = self.blocks[block_id][:offset]
        body = ' ' * self.record_size
        tail = self.blocks[block_id][offset + self.record_size:]
        self.blocks[block_id] = head + body + tail
        self.deleted_records.append([block_id, record_id])

    def delete_record_by_primary_key(self, key):
        field_key_size = self.field_sizes[0]
        success = False
        for i in range(len(self.blocks)):
            if success:
                break
            number_of_records = math.floor(self.block_size / self.record_size)
            for j in range(0, number_of_records):
                offset = self.record_size * j
                print('offset: ', offset)
                print('field_key_size: ', field_key_size)
                print('block section: ', self.blocks[i][offset:field_key_size])
                block_key = int(self.blocks[i][offset:field_key_size].strip())
                if block_key == key:
                    self._delete_record(i, j)
                    success = True
                    break
        if not success:
            raise Exception('DeleteError: Primary Key nonexistent.')

In [6]:
filename = './dataset/Coffee Shop Sales.csv'

In [38]:
myfile = Fixed_Size_Heap(
                    block_size=512,
                    field_sizes=list(size_per_column.values()),
                    filename=filename)

In [39]:
myfile.blocks[0]

'1     ,2023-01-01,07:06:11,2,5,Lower Manhattan,32,3.0  ,Coffee            ,Gourmet brewed coffee,Ethiopia Rg                 ,2     ,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.1  ,Tea               ,Brewed Chai tea      ,Spicy Eye Opener Chai Lg    ,3     ,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.5  ,Drinking Chocolate,Hot chocolate        ,Dark chocolate Lg           ,4     ,2023-01-01,07:20:24,1,5,Lower Manhattan,22,2.0  ,Coffee            ,Drip coffee          ,Our Old Time Diner Blend Sm ,'

In [40]:
myfile.delete_record_by_primary_key(1)

offset:  0
field_key_size:  6
block section:  1     


In [41]:
myfile.blocks[0]

'                                                                                                                              2     ,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.1  ,Tea               ,Brewed Chai tea      ,Spicy Eye Opener Chai Lg    ,3     ,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.5  ,Drinking Chocolate,Hot chocolate        ,Dark chocolate Lg           ,4     ,2023-01-01,07:20:24,1,5,Lower Manhattan,22,2.0  ,Coffee            ,Drip coffee          ,Our Old Time Diner Blend Sm ,'