In [74]:
# 0. GB should be 1024*1024*1024 but it probably doesn't matter as long
# as you round NUMBER_OF_BLOCKS to a whole number - D
# 1. Change page_per_block to 64 (current standard)
# 2. print the variable GC_THRESHOLD
# 3. print the count of unique LBAs in the trace
# 4. print the smallest and largest LBA (if the smallest LBA is X then
# you want to subtract X from all LBAs so they range from 0 to
# LARGEST_LBA-X)
# 5. Change the SSD size so that the number of LBA==LARGEST_LBA
# 6. fix major bug in gc, the code down below must be indented to the
# left. You do not want to perform GC for every block in the system but
# only for the block with the smallest number of valid pages.
# 7. make sure  print(gc_writes) prints a number smaller than 64
# (maximum number of valid pages per block)
# 8. Generate two synthetic benchmarks
# 8a) Write the entire LBA space 10 times, sequentially (make smaller
# SSD so it doesn't take too long)
# 8b) Write the entire LBA space 10 times, randomly (e.g. just randomly
# pick LBA and the total number of writes should be 10*LARGEST_LBA)
# 9. Print the total number of GC writes for 8a (expected zero) and 8b
# (expected many) and the real trace at the end of the simulation
# 10. let me know if you have questions

#         #found the block with minimal valid pages, move all valid pages
#         for pg in range(page_per_block):
#             #figure out the logical addresses for all phys pages in the gc block
#             phys_addr = block_struct[x][0] + pg
#             gc_lba = P2L[phys_addr]
#             invalidate_lba(gc_lba)
#             #check if we need to get a new block
#             cur_block = check_GC(cur_block,in_gc)
#             #move the gc'ed block t-o a new location
#             map_lba(gc_lba,cur_block)
#             gc_writes = gc_writes + 1
#         block_struct[gc_blk][3]=1
#         closed_blocks.remove(gc_blk)
#         free_list_block.append(gc_blk)

In [84]:
# Loading required libraries
import csv
import sys
import pandas as pd
import os
import glob
import itertools
import numpy as np
import copy
import math
import time
from pathlib import Path
from collections import Counter

# Loading trace : Needs to expanded into 4K chunks
path = r'C:\Users\cchak\Desktop\Data_ECML\VDI Traces\selected_few'
all_files = glob.glob(os.path.join(path, "synthetic_dataprep_A_deathtime_added.csv"))

f = all_files[0]  # Change the file name as required
print("Working with file " + str(f))
cols = ['IO_num','LBA','Deathtime_RWI']
df = pd.read_csv(f,engine='python',skiprows =1,header=None,na_values=['-1'], index_col=False)
df.columns = cols
df['Deathtime_RWI'] = df['Deathtime_RWI'].replace(np.NaN, -1)
lba_list = df['LBA'].tolist()
print("Min LBA in the dataset :", min(lba_list))
print("Max LBA in the dataset :", max(lba_list))
print("Number of unique LBAs in the data :",len(Counter(df['LBA'])))
print("Number of IO Accesses :",len(df))

Working with file C:\Users\cchak\Desktop\Data_ECML\VDI Traces\selected_few\synthetic_dataprep_A_deathtime_added.csv
Min LBA in the dataset : 0
Max LBA in the dataset : 1216608
Number of unique LBAs in the data : 1216609
Number of IO Accesses : 12166090


In [85]:
# # Find SSD size suitable for the Test
# lba_range_data = max(lba_list) - min(lba_list)
# TB = 1024*1024*1024*1024
# GB = 1024*1024*1024

# SSD_size_TB = math.ceil(lba_range_data/TB)
# SSD_size_GB = math.ceil(lba_range_data/GB)

# print("LBA range in TB", SSD_size_TB)
# print("LBA range in GB", SSD_size_GB)

In [86]:
# SSD specifications
page_size = 4096
page_per_block = 64                          # Hyperparameter 
over_provisioning_ratio = 0.3 
GB = 1024*1024*1024
SSD_size_GB = 4.66 * GB
ssd_capacity = SSD_size_GB                    # Hyperparameter                     
LOG_PAGE_PER_BLOCK = int(math.log(page_per_block,2))


# Make the block,page and physical addresses for normal and Overprovisioned capacity
page_addresses = []
block_addresses = []
block_placement = 0
start_counter = -1
block_addresses.append(0)

while(start_counter < (ssd_capacity/page_size) - page_size):
    start_counter = start_counter + 1
    page_addresses.append(int(start_counter))
    if(block_placement >= page_per_block):
        block_addresses.append(int(start_counter))
        block_placement = 0

    block_placement = block_placement + 1

free_list_block = copy.deepcopy(block_addresses)
free_list_page = copy.deepcopy(page_addresses)


print(len(page_addresses))
print(len(block_addresses))

1217497
19024


In [87]:
# Defining block_structure
valid_bitmap = []
write_ptr=0
invalid_pages=0
block_struct = {}

for x in free_list_block:
    start_lba = x
    for x in range(page_per_block):
        valid_bitmap.append(False)
    segment = [start_lba,invalid_pages,valid_bitmap,write_ptr]
    block_struct[start_lba]=segment
    
print(len(block_struct))


19024


In [88]:
# Block structure : Dictionary  # Key: Block_address_start    #Value: [valid_pages,valid_bitmap,write_ptr]

def invalidate_lba(lba):
    prev = L2P[lba]
    #note above how phys addresses are formed/concatenated
    prev_block = (prev >> LOG_PAGE_PER_BLOCK)*page_per_block
    prev_page = prev % page_per_block
    block_details = block_struct[prev_block]                                 # Getting block details
    block_struct[prev_block][1] = block_struct[prev_block][1] - 1            # Decreasing invalid pages
    block_struct[prev_block][2][prev_page] = False                          # Setting bitmap to False
    L2P.pop(lba)
    
    
#map LBA to phys
def map_lba(lba,cur_block):
    page_per_block = 64  
    if(isinstance(cur_block, int) == False):
        print("BAD")
    if (block_struct[cur_block][3] >= (page_per_block)):
        print("It happened")
        print(block_struct[cur_block])
    phys_addr = block_struct[cur_block][0] + (block_struct[cur_block][3])
    L2P[lba] = phys_addr
    P2L[phys_addr] = lba   
    block_struct[cur_block][2][block_struct[cur_block][3]] = True             # Setting Bitmap
    block_struct[cur_block][1] = block_struct[cur_block][1] + 1               # Increasing invalid pages
    block_struct[cur_block][3] = block_struct[cur_block][3] + 1               # Increasing Write pointer


    

#check if we need to close/open block. Do not perform GC if we are already
#doing gc
def check_GC (cur_block, in_gc):
    if (block_struct[cur_block][3] < (page_per_block)):
        return cur_block
    else:
        closed_blocks.append(cur_block)
        if(len(free_list_block) == 0):
            print("FAIL WHILE DOING GC, RAN OUT OF BLOCKS") 
        elif (len(free_list_block) <= GC_THRESHOLD):
#             print(GC_THRESHOLD)
            if(in_gc != True):
#                 print("GC called")
                in_gc = do_greedy_gc(cur_block,in_gc)
#                 print(cur_block)
        elif(len(free_list_block) == 0):
            print("FAIL WHILE DOING GC, RAN OUT OF BLOCKS")     
        if(block_struct[cur_block][3] == (page_per_block)):
            cur_block = free_list_block.pop(0)
            if(isinstance(cur_block, int) == False):
                print(cur_block)
            return cur_block



def do_greedy_gc(cur_block,in_gc):
    in_gc = True
    gc_writes = 0
    for x in closed_blocks:
        min_val = float('inf')              
        if (block_struct[x][1] < min_val):
            min_val = block_struct[x][1]
        gc_blk = x
#     print("Found the block with minimum valid pages", gc_blk)
    #found the block with minimal valid pages, move all valid pages
    for pg in range(page_per_block):
        #figure out the logical addresses for all phys pages in the gc block
        phys_addr = block_struct[x][0] + pg
        gc_lba = P2L[phys_addr]
        invalidate_lba(gc_lba)
        # Checking for valid bitmap
        prev_block = (phys_addr >> LOG_PAGE_PER_BLOCK)*page_per_block
        prev_page = phys_addr % page_per_block
        bitmap = block_struct[prev_block][2][prev_page]
        # If valid bitmap is True (data is valid), copy to OP capacity, increase GC writes
        if (bitmap != False):
            gc_writes = gc_writes + 1
            #check if we need to get a new block
            cur_block = check_GC(cur_block,in_gc)
            #move the gc'ed block t-o a new location
            map_lba(gc_lba,cur_block)   
            gc_writes = gc_writes + 1
    
    gc_file = r'C:\Users\cchak\Desktop\Data_ECML\VDI Traces\selected_few\GC_write_stats_FTL_Baseline_B.txt'
    file_object = open(gc_file, 'a')
    # Append new GC writes
    file_object.write(str(gc_writes))
    # Close the file
    file_object.close()
    # Reset block details, remove from closed list and add to free_list
    block_struct[gc_blk]= [prev_block,invalid_pages,valid_bitmap,write_ptr]
    closed_blocks.remove(gc_blk)
    free_list_block.append(gc_blk)
    in_gc = False
    if(gc_writes > 0):
        print("GC writes not as expected", gc_writes)
    return in_gc
    

In [None]:
# Core simulation of trace

L2P = {}
P2L = {}
counter = 0
GC_THRESHOLD = int(0.2*len(block_addresses))
print("GC Threshold set",GC_THRESHOLD)
min_LBA = min(lba_list)
closed_blocks = []
cur_block = free_list_block.pop(0)
block_details = block_struct[cur_block]
global in_gc 
in_gc = False
global gc_writes
gc_writes = 0


start_time = time.time()
while(counter < len(lba_list)):
    lba=int(lba_list[counter]) - min_LBA
    if(counter >100000 and counter%100000==0):
        print("Percentage completed in (%)  :", (counter/len(lba_list))*100)
    if lba in L2P:
        invalidate_lba(lba)
    last_cur_block = cur_block 
    cur_block = check_GC(cur_block,in_gc)
    if(isinstance(cur_block, int) == False):
        if(block_struct[last_cur_block][3] != page_per_block):
            cur_block = free_list_block.pop(0)
        else:
            print("Something happened after block :",last_cur_block)
    map_lba(lba,cur_block)
    #print(block_struct[cur_block])
    counter = counter + 1
print("End of Trace")
end_time = time.time()
run_time = end_time - start_time
print("Execution Time for the FTL :",run_time)


GC Threshold set 3804
Percentage completed in (%)  : 1.6439135334359682
Percentage completed in (%)  : 2.4658703001539526
Percentage completed in (%)  : 3.2878270668719365
Percentage completed in (%)  : 4.109783833589921
Percentage completed in (%)  : 4.931740600307905
Percentage completed in (%)  : 5.753697367025889
Percentage completed in (%)  : 6.575654133743873
Percentage completed in (%)  : 7.397610900461858
Percentage completed in (%)  : 8.219567667179842
Percentage completed in (%)  : 9.041524433897825


In [None]:
print('Done...Sanity Check complete!!')