---
## Create big data with faker and use generator
### Tuesday: 27-02-2024

* Read the data normally

* Read the data using generator 

* Compare time and memory usage

---

In [1]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join('..')))
import pandas as pd
from faker import Faker
from fakerHandler import FakerHandler
from Utils import Utils as utils

fake = Faker()
dataSize = 1000000
fake_user_filepath = "../data/csv/fake_users_"+str(dataSize)+".csv"

def createFakeData(filepath, size):
    if not os.path.exists(filepath):
        data = {
            'Name': [fake.name() for _ in range(size)],
            'Address': [fake.address().replace('\n', ', ') for _ in range(dataSize)],
            'Email': [fake.email() for _ in range(size)]
        }
        df = pd.DataFrame(data)
        df.to_csv(filepath, index=False)

def fakerHandler_read_csv():
    fH = FakerHandler()
    lines = fH.readCSV(fake_user_filepath)
    print(lines[0])
    print(lines[1])

def fakerHandler_read_csv_generator():
    fH = FakerHandler()
    line = fH.readCSVGenerator(fake_user_filepath)
    print(next(line))
    print(next(line))

createFakeData(fake_user_filepath, dataSize)
# Measure the functions
print(">>>Measuring fakerHandler_read_csv__")
utils.measure_function(fakerHandler_read_csv)
print(">>>Measuring fakerHandler_read_csv_generator")
utils.measure_function(fakerHandler_read_csv_generator)


>>>Measuring fakerHandler_read_csv__
Name,Address,Email

David Boone,"Unit 2954 Box 1192, DPO AP 13965",allenandrea@example.org

Memory used: 2.57421875 MB
Time taken: 0.421875 seconds

>>>Measuring fakerHandler_read_csv_generator
Name,Address,Email

David Boone,"Unit 2954 Box 1192, DPO AP 13965",allenandrea@example.org

Memory used: 0.00390625 MB
Time taken: 0.0 seconds



---
## Use big data and "Do something with it"
### Tuesday: 28-02-2024

* Count names normally

* Count names using generator

* Count names using generators and threading

---

In [19]:
def countNames(name):
    fH = FakerHandler()
    lines = fH.getNameData(fake_user_filepath, name)
    names = [line[0] for line in lines]
    print("Number of "+name+": ", len(names))

def countNamesGenerator(name):
    fH = FakerHandler()
    count = 0
    lines = fH.getNameDataGenerator(fake_user_filepath, name)
    for l in lines:
        count += 1
    print("Number of "+name+": ", count)

print(">>>Measuring countNames")
utils.measure_function(countNames, "John")
print(">>>Measuring countNamesGenerator")
utils.measure_function(countNamesGenerator, "John")

>>>Measuring countNames
Number of John:  63698
Memory used: 1.91796875 MB
Time taken: 0.359375 seconds

>>>Measuring countNamesGenerator
Number of John:  63698
Memory used: -1.0 MB
Time taken: 0.390625 seconds

