In [63]:
import os
import sys
import pandas as pd

In [64]:
bin_id = 0
# csv_file = 'filtered_data/bin_' + str(bin_id) + '.csv'
csv_file = 'filtered_data/test.csv'
if not os.path.exists(csv_file):
    print(f"Error: The file {csv_file} does not exist.")
    sys.exit(1)
df = pd.read_csv(csv_file, header=None, names=['op_type', 'rw', 'lpa_start', 'lpa_len'])
# the row 0 is the header, so we need to skip it
df = df.iloc[1:]
# lpa_start is int, and lpa_len is int
df['lpa_start'] = df['lpa_start'].astype(int)
df['lpa_len'] = df['lpa_len'].astype(int)
df.head()

Unnamed: 0,op_type,rw,lpa_start,lpa_len
1,Q,R,636634680,8
2,G,R,636634680,8
3,I,RS,636634680,8
4,D,RS,636634680,8
5,C,RS,636634680,8


In [65]:
write_operations = df[df['rw'].isin(['W', 'I', 'WS', 'PUT'])][['lpa_start', 'lpa_len']]
write_operations.head()

Unnamed: 0,lpa_start,lpa_len
8,5,10
9,3,4
10,20,3
11,12,10


In [66]:
mp = {} # key is lpa and value is its sequentialized lpa
# iterate through the write operations and sequentialize them
res = []
allocated_vpa = 0
for index, row in write_operations.iterrows():
    lpa_start = row['lpa_start']
    lpa_len = row['lpa_len']
    print(f"Processing LPA range: [{lpa_start},{lpa_start + lpa_len - 1}]")
    prev_mode = 'seq_w'
    cnt = 0
    from_lpa = lpa_start
    for i in range(lpa_start, lpa_start + lpa_len):
        if i in mp:
            # hit, implies an update
            if prev_mode == 'seq_w':
                # first append (mode,from_lpa, cnt) to res
                if cnt > 0:
                    res.append((prev_mode, mp[from_lpa], cnt))
                # then reset the cnt to 1, from_lpa to i, and prev_mode to 'update'
                cnt = 1
                from_lpa = i
                prev_mode = 'up_w'
            else: # already in update mode
                # update the cnt
                cnt += 1
        else:
            mp[i] = allocated_vpa
            allocated_vpa += 1
            if prev_mode == 'seq_w':
                cnt += 1
            else: # already in update mode
                # first append (from_lpa, cnt) to res
                if cnt > 0:
                    res.append((prev_mode, mp[from_lpa], cnt))
                # then reset the cnt to 1, from_lpa to i, and prev_mode to 'seq_write'
                cnt = 1
                from_lpa = i
                prev_mode = 'seq_w'
    # at the end of the for loop, append the last (prev_mode, from_lpa, cnt) to res
    if cnt > 0:
        res.append((prev_mode, mp[from_lpa], cnt))

Processing LPA range: [5,14]
Processing LPA range: [3,6]
Processing LPA range: [20,22]
Processing LPA range: [12,21]


In [67]:
# dump res into a csv file
output_file = 'filtered_data/sequentialized_bin_' + str(bin_id) + '.csv'
res_df = pd.DataFrame(res, columns=['mode', 'vpa_start', 'cnt'])
res_df.to_csv(output_file, index=False)
print(f"Sequentialized data has been written to {output_file}")

Sequentialized data has been written to filtered_data/sequentialized_bin_0.csv
