In [23]:
import os
import sys
import pandas as pd
# create seq folder if it does not exist
output_dir = 'sequentialized_data'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
# read in range.txt if it exists
range_file = 'range.txt'
num_bin = 0
if os.path.exists(range_file):
    with open(range_file, 'r') as f:
        lines = f.readlines()
        for line in lines:
            # seperate it by whitespace
            parts = line.strip().split()
            if len(parts)==1:
                num_bin = int(parts[0])
                break
print(f'Number of bins: {num_bin}')

Number of bins: 200


In [24]:
for bin_id in range(0, num_bin):
    csv_file = 'filtered_data/bin_' + str(bin_id) + '.csv'
    # csv_file = 'test.csv'
    if not os.path.exists(csv_file):
        print(f"Error: The file {csv_file} does not exist.")
        sys.exit(1)
    df = pd.read_csv(csv_file, header=None, names=['op_type', 'rw', 'lpa_start', 'lpa_len'])
    # the row 0 is the header, so we need to skip it
    df = df.iloc[1:]
    # lpa_start is int, and lpa_len is int
    df['lpa_start'] = df['lpa_start'].astype(int)
    df['lpa_len'] = df['lpa_len'].astype(int)
    # we pick only write operation to covnert for read opertion's address didn't follow # 
    write_operations = df[df['rw'].isin(['W', 'I', 'WS', 'PUT'])][['lpa_start', 'lpa_len']]
    write_operations.head()

    mp = {} # key is lpa and value is its sequentialized lpa
    # iterate through the write operations and sequentialize them
    res = []
    allocated_vpa = 0
    for index, row in write_operations.iterrows():
        lpa_start = row['lpa_start']
        lpa_len = row['lpa_len']
        # print(f"Processing LPA range: [{lpa_start},{lpa_start + lpa_len - 1}]")
        prev_mode = 'seq_write'
        cnt = 0
        from_lpa = lpa_start
        for i in range(lpa_start, lpa_start + lpa_len):
            if i in mp:
                # hit, implies an update
                if prev_mode == 'seq_write':
                    # first append (mode,from_lpa, cnt) to res
                    if cnt > 0:
                        res.append((prev_mode, mp[from_lpa], cnt))
                    # then reset the cnt to 1, from_lpa to i, and prev_mode to 'update'
                    cnt = 1
                    from_lpa = i
                    prev_mode = 'overwrite'
                else: # already in update mode
                    # update the cnt
                    cnt += 1
            else:
                mp[i] = allocated_vpa
                allocated_vpa += 1
                if prev_mode == 'seq_write':
                    cnt += 1
                else: # already in update mode
                    # first append (from_lpa, cnt) to res
                    if cnt > 0:
                        res.append((prev_mode, mp[from_lpa], cnt))
                    # then reset the cnt to 1, from_lpa to i, and prev_mode to 'seq_writerite'
                    cnt = 1
                    from_lpa = i
                    prev_mode = 'seq_write'
        # at the end of the for loop, append the last (prev_mode, from_lpa, cnt) to res
        if cnt > 0:
            res.append((prev_mode, mp[from_lpa], cnt))

    # dump res into a csv file
    output_file = output_dir + '/bin_' + str(bin_id) + '.csv'
    res_df = pd.DataFrame(res, columns=['mode', 'vpa_start', 'cnt'])
    res_df.to_csv(output_file, index=False)
    print(f"Sequentialized data has been written to {output_file}")

Sequentialized data has been written to sequentialized_data/bin_0.csv
Sequentialized data has been written to sequentialized_data/bin_1.csv
Sequentialized data has been written to sequentialized_data/bin_2.csv
Sequentialized data has been written to sequentialized_data/bin_3.csv
Sequentialized data has been written to sequentialized_data/bin_4.csv
Sequentialized data has been written to sequentialized_data/bin_5.csv
Sequentialized data has been written to sequentialized_data/bin_6.csv
Sequentialized data has been written to sequentialized_data/bin_7.csv
Sequentialized data has been written to sequentialized_data/bin_8.csv
Sequentialized data has been written to sequentialized_data/bin_9.csv
Sequentialized data has been written to sequentialized_data/bin_10.csv
Sequentialized data has been written to sequentialized_data/bin_11.csv
Sequentialized data has been written to sequentialized_data/bin_12.csv
Sequentialized data has been written to sequentialized_data/bin_13.csv
Sequentialized d