In [4]:
import time

class LSMTree:
    def __init__(self, memtable_threshold=3):
        self.memtable = {}
        self.sorted_nums = []
        self.memtable_threshold = memtable_threshold

    def insert(self, key, value):
        start_time = time.time()
        self.memtable[key] = value
        if len(self.memtable) >= self.memtable_threshold:
            self.flush_memtable_to_disk()
        end_time = time.time()
        print(f"Insertion took {end_time - start_time:.6f} seconds")

    def flush_memtable_to_disk(self):
        sorted_num = sorted(self.memtable.items())
        self.sorted_nums.append(sorted_num)
        self.memtable = {}

    def search(self, key):
        start_time = time.time()
        # Search in memtable first
        if key in self.memtable:
            end_time = time.time()
            print(f"Get operation took {end_time - start_time:.6f} seconds")
            return self.memtable[key]

        # Search in sorted nums
        for sorted_num in reversed(self.sorted_nums):
            for k, v in sorted_num:
                if k == key:
                    end_time = time.time()
                    print(f"Get operation took {end_time - start_time:.6f} seconds")
                    return v
        end_time = time.time()
        print(f"Get operation took {end_time - start_time:.6f} seconds")
        return None

    def delete(self, key):
        start_time = time.time()
        # Delete from memtable if present
        if key in self.memtable:
            del self.memtable[key]

        # Delete from sorted nums if present
        for sorted_num in self.sorted_nums:
            for i, (k, _) in enumerate(sorted_num):
                if k == key:
                    del sorted_num[i]
                    end_time = time.time()
                    print(f"Deletion took {end_time - start_time:.6f} seconds")
                    return True
        return False

    def print_sorted_nums(self):
        print("Sorted Nums:")
        for i, sorted_num in enumerate(self.sorted_nums):
            print(f"Level {i}: {sorted_num}")

    def range_query(self, start_key, end_key):
        result = []
        start_time = time.time()
        for sorted_num in self.sorted_nums:
            for k, v in sorted_num:
                if start_key <= k <= end_key:
                    result.append((k, v))
        end_time = time.time()
        print("Time taken for range query : ",end_time - start_time,"seconds")
        
        return result

    def insert_values(self, filename):
        start_time = time.time()
        key = 1
        with open(filename, "r") as file:
            for line in file:
                value = line.strip()
                self.insert(key, value)
                key += 1
        end_time = time.time()
        print("Time taken to insert values:", end_time - start_time, "seconds")

In [5]:
filename = "Random_10k_kv_pairs.txt"

In [7]:
kv_store = LSMTree()
kv_store.insert_values(filename)

Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertion took 0.000000 seconds
Insertio

In [8]:
import os
def get_file_size(filename):
    if not os.path.exists(filename):
        return "File not found"

    file_size = os.path.getsize(filename)
    return (file_size/(1024))

print("space required : ",get_file_size(filename),"KB")

space required :  282.9482421875 KB


In [9]:
kv_store.print_sorted_nums()

Sorted Nums:
Level 0: [(1, 'rmtEW: HTQmCIXozTCnWAKcH6HJ'), (2, 'WWZkY: oIE21MnaNRivr0UfgUMu'), (3, 'WdOsb: dTVYF7rwwDPfcqKkJHX9')]
Level 1: [(4, 'oj35A: jBw2PjraOvWb6mXhdGij'), (5, 'oSgQB: 45fYpZf3WvwEcylh0KRP'), (6, '66o7o: ziBcvNIKbtI2M85TirUy')]
Level 2: [(7, '2cYua: 5YargVKsHHyuuVSNlrru'), (8, 'jCBiT: osayJcVk3acjCq9FM9Bw'), (9, 'd9owL: NtXmROskmfmjG0Sv8fEm')]
Level 3: [(10, 'ihOGc: rGyqrm8Uf6hlpZvyLACK'), (11, 'UeJ3f: NmbTCfECfLp5OXNAsmC0'), (12, 'EHJPX: xF3IQEfUMSoOANLOPMNU')]
Level 4: [(13, 'A9bs6: uakCEf4z2ZU6ohWA9orO'), (14, 'VM3O9: mBJaUZtna73W11GWjbHQ'), (15, 'aCTmd: oFA5SfxTgwgol4Y6okA2')]
Level 5: [(16, 'IaQo7: PUkk2BHe6L3pVfNX1Gvb'), (17, 'q1rg1: RhRyYLzLxrwBPQIIaNzy'), (18, 'ZsUya: d9KIo05zWl74ILYtDcGp')]
Level 6: [(19, 'stP96: euNecyZAgEyZBjoirVY7'), (20, 'dJNrn: jO0LRL4SCTP7wkn78qpP'), (21, 'PKHwM: raVKrE0XHxT6BRmZMadg')]
Level 7: [(22, 'AwfEz: oS2z8BNq7y4xdh3XFMrg'), (23, 'ap7g8: T1YjvBxVpXqcsJlunnaH'), (24, 'nyrCq: vu6Ndb69aNvPIqUbc5ek')]
Level 8: [(25, 'ROYAG: SL2hL

In [10]:
import os
init_space = get_file_size(filename)
kv_store.delete(548)
temp_filename = filename + '.tmp'
with open(filename,'r')as file , open(temp_filename, 'w') as temp_file:
    for line in file:
        if "5Gu9I" not in line:
            temp_file.write(line)
        
os.remove(filename)
os.rename(temp_filename , filename)
end_space = get_file_size(filename)
print("After deletion",end_space,"KB")

Deletion took 0.000000 seconds
After deletion 282.9482421875 KB


In [11]:
kv_store.range_query(50,4090)

Time taken for range query :  0.0030143260955810547 seconds


[(50, 'IN03O: cAWyGe8ToD2YoSjMG5D2'),
 (51, 'Xr3cq: JgZ0wgWqlzwtwzag5FjK'),
 (52, 'BKRhO: hRinYYoltzo0JWZdb6PG'),
 (53, 'TYXXN: FXrb8uT6U8LoqFKk8JQ3'),
 (54, 'uemDB: g7HE5qUiTjVvBXyvfqol'),
 (55, '9hKlN: 9SBpd6nxSG7lyIwo7LkL'),
 (56, 'sJdzm: xveteyCakXaODkJ06vG5'),
 (57, 'yosnC: 2qAih7CUUNq5uEEbcU3Q'),
 (58, 'HTciC: QzjM2qgdDcE9g2su871Z'),
 (59, 'nGPSJ: k4zT3hCoPGjAHxmq4nt8'),
 (60, 'bJWxA: GpH2DZbInfDQUVIItccO'),
 (61, 'IzMqs: FvTQ8CXQIjlglP6v3ewL'),
 (62, 'om2ZX: 3CyY8P2J5O8l3HOc4RcI'),
 (63, 'MrxUV: SwRCrjdWUn74iTKpwWVI'),
 (64, 'rm21K: btnvdov9gt6eZdTiGG1q'),
 (65, 'rRjdI: H4B11aWIaA4ZQHVJIEZt'),
 (66, '4Urpl: iCnOtDTRjMgyWG5HVhhs'),
 (67, 'VaTY3: xqF99oNSi1CPzQ7G7hCd'),
 (68, 'IiStq: 1sfgVfcCXDUo1eQLEmwy'),
 (69, 'jBQPZ: aDnzayxTmRoaqSFs2IO2'),
 (70, 'LmStc: 7b18BgY9glCBzwq0qnNC'),
 (71, 'H78Dc: lWMLV26MIXKYL8Fc780S'),
 (72, '7EYYu: 9kCjRz5jVEONIL9fDfD9'),
 (73, 'bIm27: mBabMCx1N1R7MhKzNai0'),
 (74, '9Iuyr: ODYX8akwBDYn59BtvZ1K'),
 (75, 'e0DKp: YX9rhRAq55Y1rGa2IHYL'),
 (76, 'iRhSj

In [12]:
kv_store.search(4567)

Get operation took 0.000000 seconds


'oT4xa: gPkPw0mEbnLK28tN0bbW'