---
## Create big data with faker and use generator
### Tuesday: 27-02-2024

* Read the data normally

* Read the data using generator 

* Compare time and memory usage

---

In [10]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join('..')))
import time
import memory_profiler as mem_profile
import random
import pandas as pd
from faker import Faker
from fakerHandler import FakerHandler

fake = Faker()
dataSize = 10000000
fake_user_filepath = "../data/csv/fake_users_"+str(dataSize)+".csv"

def createFakeData(filepath, size):
    if not os.path.exists(filepath):
        data = {
            'Name': [fake.name() for _ in range(size)],
            'Address': [fake.address().replace('\n', ', ') for _ in range(dataSize)],
            'Email': [fake.email() for _ in range(size)]
        }
        df = pd.DataFrame(data)
        df.to_csv(filepath, index=False)

# Measure the time and memory usage of a function
def measure_function(func, *args, **kwargs):
    t1 = time.process_time()
    mem_before = mem_profile.memory_usage()

    # Function to measure
    func(*args, **kwargs)

    mem_after = mem_profile.memory_usage()
    t2 = time.process_time()
    print("Memory used: " + str(mem_after[0] - mem_before[0]) + " MB")
    print("Time taken: " + str(t2-t1) + " seconds"+"\n")

def fakerHandler_read_csv():
    fH = FakerHandler()
    lines = fH.readCSV(fake_user_filepath)
    print(lines[0])
    print(lines[1])

def fakerHandler_read_csv_generator():
    fH = FakerHandler()
    line = fH.readCSVGenerator(fake_user_filepath)
    print(next(line))
    print(next(line))

createFakeData(fake_user_filepath, dataSize)
# Measure the functions
print(">>>Measuring fakerHandler_read_csv__")
measure_function(fakerHandler_read_csv)
print(">>>Measuring fakerHandler_read_csv_generator")
measure_function(fakerHandler_read_csv_generator)


>>>Measuring fakerHandler_read_csv__
Name,Address,Email

James Strong,"7746 Roberto Key, Hallside, DC 88675",bobby66@example.org

Memory used: -0.01171875 MB
Time taken: 1.515625 seconds

>>>Measuring fakerHandler_read_csv_generator
Name,Address,Email

James Strong,"7746 Roberto Key, Hallside, DC 88675",bobby66@example.org

Memory used: 0.0 MB
Time taken: 0.0 seconds

