In [1]:
import csv
import sys
import pandas as pd
import os
import glob
import itertools
import numpy as np
import copy
import math
import time
from pathlib import Path
from collections import Counter


# Loading trace : Needs to expanded into 4K chunks
path = r'/Users/chandranilchakraborttii/Documents/GC_pred/data'
all_files = glob.glob(os.path.join(path, "2016030917-LUN2.csv_dataprep_deathtime_added.csv"))

f = all_files[0]  # Change the file name as required
print("File Name :" + str(f))

df = pd.read_csv(f,engine='python',skiprows =1,header=None,na_values=['-1'], index_col=False)
cols = ['IO','LBA','deathtime']
df.columns = cols
print("Before",len(df))
df['deathtime'] = df['deathtime'].replace(np.NaN, -1)
df = df.loc[df['deathtime'] != -1]
df = df.loc[df['deathtime'] != 1]
print("After",len(df))

lba_list = df['LBA'].tolist()
deathtime_list = df['deathtime'].tolist()
print("Min LBA in the dataset :", min(lba_list))
print("Max LBA in the dataset :", max(lba_list))
print("Number of unique LBAs in the data :",len(Counter(df['LBA'])))
print("Number of IO Accesses :",len(df))

File Name :/Users/chandranilchakraborttii/Documents/GC_pred/data/2016030917-LUN2.csv_dataprep_deathtime_added.csv
Before 1630000
After 943841
Min LBA in the dataset : 658733056
Max LBA in the dataset : 5335229436416
Number of unique LBAs in the data : 52954
Number of IO Accesses : 943841


In [2]:
#SSD specifications
num_page_addresses = len(Counter(df['LBA']))
page_size = 4096
page_per_block = 16
GB = 1024*1024*1024
SSD_size = num_page_addresses*page_size
# SSD_size_GB_normal = round_decimals_up(SSD_size/GB,1)
SSD_size_GB_normal = SSD_size/GB
over_provisioning_ratio = 0.2
LOG_PAGE_PER_BLOCK = int(math.log(page_per_block,2))
# SSD_size_full = round_decimals_up((1 + over_provisioning_ratio)*SSD_size_GB_normal,1)
SSD_size_full = (1 + over_provisioning_ratio)*SSD_size_GB_normal
print("SSD Capacity (Available in GB) :",SSD_size_GB_normal)
print("SSD Capacity (Total in GB)     :",SSD_size_full)

SSD Capacity (Available in GB) : 0.20200347900390625
SSD Capacity (Total in GB)     : 0.24240417480468748


In [3]:
GB = 1024*1024*1024
ssd_capacity = SSD_size_full *GB

# Make the block,page and physical addresses for normal and Overprovisioned capacity
page_addresses = []
block_addresses = []
block_placement = 0
start_counter = -1
block_addresses.append(0)

while(start_counter < (ssd_capacity/page_size) - page_size):
    start_counter = start_counter + 1
    page_addresses.append(int(start_counter))
    if(block_placement >= page_per_block):
        block_addresses.append(int(start_counter))
        block_placement = 0

    block_placement = block_placement + 1

free_list_block = copy.deepcopy(block_addresses)
free_list_page = copy.deepcopy(page_addresses)

block_struct = {}
for x in free_list_block:
    start_lba = x
    write_ptr=0
    invalid_pages=0
    hashset = set()
    valid_bitmap = []

    for x in range(page_per_block):
        valid_bitmap.append(False)

    segment = [start_lba,invalid_pages,valid_bitmap,write_ptr]
    block_struct[start_lba]=segment
    

print("Total number of Blocks created: ", len(block_addresses))
print("Total number of Pages created:  ", len(page_addresses))
print(str(len(block_struct)) + " 4K blocks Initialized" )

Total number of Blocks created:  3716
Total number of Pages created:   59450
3716 4K blocks Initialized


In [4]:
def invalidate_lba(lba):
    prev = L2P[lba]
    prev_block = (prev >> LOG_PAGE_PER_BLOCK)*page_per_block
    prev_page = prev % page_per_block
    block_details = block_struct[prev_block]                                 # Getting block details
    block_struct[prev_block][2][prev_page] = False                          # Setting bitmap to False
    block_struct[prev_block][1] = block_struct[prev_block][2].count(True)     # Setting invalid pages
    L2P.pop(lba)
    
        
def demote():
    queue_counter = 0
    while(queue_counter < len(queues)):
        if len(queue) == 0:
            continue
        else:
            if (chunk_id in chunk_expire_time):
                expire_time = chunk_expire_time[chunk_id]
                if(expire_time < dev_total_writes[0]):
                    chunk_expire_time[chunk_id]  = dev_total_writes + dev_lifetime
                    queues.remove(queue)
                
                if((chunk_id == hottest_chunk) and ((queue_counter + 1) < len(queues))):
                    tmp_chunk_id = queues[queue_counter + 1]
                    dev_max_writes = chunk_writes[tmp_chunk_id]
                    hottest_chunk  = tmp_chunk_id
                    
                if (queue_counter ==0):
                    queues[queue_counter] = chunk_id
                else:
                    queues[queue_counter - 1] = chunk_id
        
        
def promote(chunk_id):
    dev_total_writes[0] = dev_total_writes[0] + 1
    if chunk_id in chunk_writes:
        num_writes = chunk_writes[chunk_id] + 1
        last_access = chunk_last_access[chunk_id]
        if chunk_id in chunk_queue:
            prev_queue_id = chunk_queue[chunk_id]
        queue_id = math.log(num_writes,2)               
        if(queue_id >= len(block_IO_burst) - 1):
            queue_id = len(block_IO_burst) - 2
        else:
            num_writes = 1
            last_access = 0
            prev_queue_id = -1
            queue_id = 0
            
        if(num_writes > dev_max_writes):
            hottest_chunk = chunk_id
            dev_lifetime  = dev_total_writes  - last_access
            dev_max_writes[0] = num_writes
            
        if(prev_queue_id != -1):
            if prev_queue_id in queues:
                queues[prev_queue_id].remove(chunk_id)
        queues[prev_queue_id] = chunk_id
        
        chunk_writes[chunk_id]= num_writes
        chunk_last_access[chunk_id]= dev_total_writes[0]
        chunk_expire_time[chunk_id] =dev_total_writes[0] + dev_lifetime[0]
        chunk_queue[chunk_id] = queue_id
        
        
        
 # Finding which block to add the LBA
def map_lba(lba,block_IO_burst,in_gc,counter):
    block_select = block_IO_burst[0] 
    chunk_id = lba/chunk_size[0]
    while(chunk_id*chunk_size[0] < lba):
        chunk_id = chunk_id + 1
        promote(chunk_id)
        demote()
        
    if(in_gc == True):                                                      
        block_select = block_IO_burst[len(block_IO_burst) - 1]
    else:
        if (lba/chunk_size[0] in chunk_queue):
            block_select = chunk_queue[lba/chunk_size[0]]
        if block_select < 0 or (block_select > len(block_IO_burst) - 1):
                block_select = block_IO_burst[0]
        
    # Block Found, now updating block          
    phys_addr = block_struct[block_select][0] + (block_struct[block_select][3])
    L2P[lba] = phys_addr
    P2L[phys_addr] = lba   
    block_struct[block_select][2][block_struct[block_select][3]] = True             # Setting Bitmap
    block_struct[block_select][1] = block_struct[block_select][2].count(True)       # Setting invalid pages
    block_struct[block_select][3] = block_struct[block_select][3] + 1               # Increasing Write pointer
    total_user_writes[0] = total_user_writes[0] + 1                                 

    

#check if we need to close/open block. Do not perform GC if we are already
def check_GC (block_IO_burst, in_gc,counter):
    for x in block_IO_burst:
        # If block is full, close block
        if(block_struct[x][3] == page_per_block):
            closed_blocks.append(x)                       # Adding to closed list
            block_IO_burst.remove(x)                    
            new_block = free_list_block.pop(0)            #  Requesting a new block
            block_IO_burst.append(new_block)              # Adding to open blocks

    
    if(len(free_list_block) == 0):
        print("FAIL WHILE DOING GC, RAN OUT OF BLOCKS") 
   # Checking if GC is needed
    elif (len(free_list_block) <= GC_THRESHOLD):
        # Checking if GC is already going on
        if(in_gc != True):
            in_gc = do_greedy_gc(block_IO_burst,in_gc,counter) 
    return block_IO_burst




def do_greedy_gc(block_IO_burst,in_gc,counter):
    gc_blk =0
    in_gc = True
    gc_writes = 0 
    min_val = float('inf')
    for x in closed_blocks:              
        if (block_struct[x][1] < min_val):
            min_val = block_struct[x][1]
            gc_blk = x

    for pg in range(page_per_block):
        #figure out the logical addresses for all phys pages in the gc block
        phys_addr = block_struct[gc_blk][0] + pg
        # Updating P2L
        if (phys_addr in P2L):
            gc_lba = P2L[phys_addr]
            P2L.pop(phys_addr)            
        # Updating L2P
        # Checking for valid bitmap
        prev_block = (phys_addr >> LOG_PAGE_PER_BLOCK)*page_per_block
        prev_page = phys_addr % page_per_block
        bitmap = block_struct[prev_block][2][prev_page]
    
        # If valid bitmap is True (data is valid), copy to OP capacity, increase GC writes
        if (bitmap == True):
            invalidate_lba(gc_lba)
            gc_writes = gc_writes + 1
            #check if we need to get a new block
            block_IO_burst = check_GC(block_IO_burst,in_gc,counter)
            #move the gc'ed block t-o a new location
            map_lba(gc_lba,block_IO_burst,in_gc,counter)
    
                
    if(gc_writes > 64):
        print("GC writes not as expected", gc_writes)
    total_gc_writes[0] = total_gc_writes[0] + gc_writes
    
    invalid_pages = 0
    valid_bitmap = []
    write_ptr = 0

    for x in range(page_per_block):
        valid_bitmap.append(False)
            
    # Reseting the GC blk and add to free_block_list
    block_struct[gc_blk]= [gc_blk,invalid_pages,valid_bitmap,write_ptr]
    closed_blocks.remove(gc_blk)
    free_list_block.append(gc_blk)
    for x in block_IO_burst:
        # If block is full, close block and reset death time
        if(block_struct[x][3] >= page_per_block):
            closed_blocks.append(x)                       # Adding to closed list
            block_IO_burst.remove(x)                    
            new_block = free_list_block.pop(0)            #  Requesting a new block
            block_IO_burst.append(new_block)              # Adding to open blocks
    
    in_gc = False
    return in_gc

        


In [5]:
# Setting global parameters
# Initalizing Starting Free Blocks..
global gc_writes
global in_gc 
dev_lifetime = []
dev_lifetime.append(0)
dev_max_writes = []
dev_max_writes.append(0)
hottest_chunk_id = []
hottest_chunk_id.append(0)
dev_total_writes = []
dev_total_writes.append(0)


num_cur_blocks_open = 20       # Hyperparameter

L2P = {}
P2L = {}
closed_blocks = []
cur_blocks_open = []
lba_burst = []
deathtime_range_list = []
interval = float(100/num_cur_blocks_open)
gc_writes = 0
in_gc = False


count = 0
while(count < num_cur_blocks_open):
    deathtime_range_list.append(int(np.percentile(deathtime_list, (count+1)*interval)))
    count = count + 1
    

block_IO_burst = []

print("Initalizing Starting Free Blocks...")
for x in range(num_cur_blocks_open):
    block_num = free_list_block.pop(0)                                            # Getting a free block
    block_IO_burst.append(block_num) 
    
queues= []
for x in range(len(block_IO_burst)):
    queue = set()
    queues.append(queue)

Initalizing Starting Free Blocks...


In [None]:
total_gc_writes = []
total_gc_writes.append(0)
priority_writes = []
chunk_size = []
chunk_size.append(1)
priority_writes.append(0)
total_user_writes = []
total_user_writes.append(0)



counter = 0
GC_THRESHOLD = 50
in_gc = False
chunk_last_acess = {}
chunk_expire_time = {}
chunk_writes = {}
chunk_queue = {}
chunk_last_access = {}



print("Starting Trace..!")
start_time = time.time()
while(counter < len(lba_list)):
    if(counter >= 10000 and counter %10000 == 0):
        print("Percent Done",(counter/len(lba_list))*100)
    
    lba= int(lba_list[counter])
    if lba in L2P:
        invalidate_lba(lba)
    block_IO_burst = check_GC(block_IO_burst,in_gc,counter)
    map_lba(lba,block_IO_burst,in_gc,counter)
    counter = counter + 1

end_time = time.time()
run_time = end_time - start_time


Starting Trace..!
Percent Done 1.0595004878999745
Percent Done 2.119000975799949
Percent Done 3.178501463699924
Percent Done 4.238001951599898
Percent Done 5.297502439499873
Percent Done 6.357002927399848
Percent Done 7.416503415299823
Percent Done 8.476003903199796
Percent Done 9.535504391099773
Percent Done 10.595004878999745
Percent Done 11.654505366899722
Percent Done 12.714005854799696
Percent Done 13.77350634269967
Percent Done 14.833006830599645
Percent Done 15.892507318499622
Percent Done 16.952007806399592
Percent Done 18.01150829429957
Percent Done 19.071008782199545
Percent Done 20.13050927009952
Percent Done 21.19000975799949
Percent Done 22.24951024589947
Percent Done 23.309010733799443
Percent Done 24.368511221699418
Percent Done 25.428011709599392
Percent Done 26.48751219749937
Percent Done 27.54701268539934
Percent Done 28.606513173299312
Percent Done 29.66601366119929
Percent Done 30.725514149099265
Percent Done 31.785014636999243
Percent Done 32.84451512489922
Percent

In [None]:
print("GC Threshold :",GC_THRESHOLD)
print("OverProvisioning Ratio :",over_provisioning_ratio)
print("Number of Open Blocks :",num_cur_blocks_open)
print("Total Number of writes :",total_user_writes[0])
print("Total Number of GC writes :",total_gc_writes[0])
print("Total Number of user writes :",(total_user_writes[0] - total_gc_writes[0]))
print("Write amplification",(total_user_writes[0] + total_gc_writes[0])/total_user_writes[0])
print("Execution Time for the FTL :",run_time)
print("Done..!")