In [None]:
import os
import concurrent.futures
import time

def generate_large_file(filename, size_in_mb=50):
    """generate files"""
    with open(filename, 'wb') as f:
        f.write(os.urandom(size_in_mb * 1024 * 1024))

def read_file(filename):
    with open(filename, 'rb') as f:
        return f.read()

def write_file(data, filename):
    with open(filename, 'wb') as f:
        f.write(data)

if __name__ == "__main__":
    # 1) generate several files
    file_list = [f"./data/big_file_{i}.txt" for i in range(10)]
    
    # record the time of start
    start_time = time.time()
    for fname in file_list:
        generate_large_file(fname, size_in_mb=100)  # generate a 100 MB file
    end_time = time.time()
    print(f"File generation time: {end_time - start_time:.2f} seconds")
    
    # 2) read files using ThreadPoolExecutor
    start_time = time.time()
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        contents_threadpool = list(executor.map(read_file, file_list))
    end_time = time.time()
    print(f"File reading time (ThreadPool): {end_time - start_time:.2f} seconds")
    
    # 3) read files using ProcessPoolExecutor
    start_time = time.time()
    with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
        contents_processpool = list(executor.map(read_file, file_list))
    end_time = time.time()
    print(f"File reading time (ProcessPool): {end_time - start_time:.2f} seconds")
    
    # 4) write files using ThreadPoolExecutor
    out_file_list = [f"./data/output_{i}.txt" for i in range(len(file_list))]
    start_time = time.time()
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        executor.map(write_file, contents_threadpool, out_file_list)
    end_time = time.time()
    print(f"File writing time (ThreadPool): {end_time - start_time:.2f} seconds")
    
    # 5) write files using ProcessPoolExecutor
    start_time = time.time()
    with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
        executor.map(write_file, contents_processpool, out_file_list)
    end_time = time.time()
    print(f"File writing time (ProcessPool): {end_time - start_time:.2f} seconds")


File generation time: 1.44 seconds
File reading time (ThreadPool): 0.38 seconds


BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.