# Initial Producer
This is the initial producer in the streaming pipeline. This should run on the computer dedicated to the hyperspectral camera and stream raw data out to the PARADIM broker when it is captured. The code is structured off of notebooks at github.com/openmsi/openmsistream_short_course.

## Config

In [1]:
# imports
import pathlib, logging, importlib
from threading import Thread
from openmsitoolbox.logging import OpenMSILogger
from openmsistream import UploadDataFile, DataFileUploadDirectory

In [2]:
# Configure a logger (only needed when running in a Jupyter notebook like this)
logger = OpenMSILogger("LocalProducer", filelevel=None)
importlib.reload(logging)

<module 'logging' from '/Users/namanparikh/opt/anaconda3/envs/openmsi/lib/python3.9/logging/__init__.py'>

In [3]:
# The name of the topic to work with
TOPIC_NAME = "tutorial_data"

# Paths to the config file and the directory holding the test files
repo_root_dir = pathlib.Path().resolve().parent
CONFIG_FILE_PATH = repo_root_dir / "streaming" / "config_files" / "confluent_cloud_broker.config"
TEST_FILE_DIR = repo_root_dir / "streaming" / "test_folder"

## UploadDataFile (to test)
can you set it up so that each image folder is uploaded as a separate message?

In [4]:
CONFIG_FILE_PATH

PosixPath('/Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/config_files/confluent_cloud_broker.config')

In [5]:
TEST_FILE_DIR

PosixPath('/Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder')

In [6]:
for iuf, upload_file_path in enumerate(TEST_FILE_DIR.rglob("*")):
    print(iuf, upload_file_path)

0 /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/.DS_Store
1 /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/test_data.txt
2 /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/LOGS
3 /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/.ipynb_checkpoints
4 /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/LOGS/upload_to_tutorial_data_in_progress.csv
5 /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/.ipynb_checkpoints/test_data-checkpoint.txt


In [7]:
# For every file in the folder
for iuf, upload_file_path in enumerate(TEST_FILE_DIR.rglob("*")):
    # Skip any hidden files (like .DS_Store....)
    if upload_file_path.is_dir() or upload_file_path.name.startswith("."):
        continue
    # Create an UploadDataFile and call the function to upload it to the topic
    upload_file = UploadDataFile(upload_file_path, rootdir=TEST_FILE_DIR, logger=logger)
    upload_file.upload_whole_file(CONFIG_FILE_PATH, TOPIC_NAME)

[LocalProducer 2024-07-18 00:00:59] Uploading /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/test_data.txt to tutorial_data in 524288-byte chunks using 2 threads....
[LocalProducer 2024-07-18 00:01:00] Waiting for all enqueued messages to be delivered (this may take a moment)....
[LocalProducer 2024-07-18 00:01:00] Done uploading /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/test_data.txt
[LocalProducer 2024-07-18 00:01:00] Uploading /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/.ipynb_checkpoints/test_data-checkpoint.txt to tutorial_data in 524288-byte chunks using 2 threads....
[LocalProducer 2024-07-18 00:01:00] Waiting for all enqueued messages to be delivered (this may take a moment)....
[LocalProducer 2024-07-18 00:01:01] Done uploading /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/.ipynb

## DataUploadFileDirectory

In [7]:
def upload_task(upload_directory, *args, **kwargs):
    """Run "upload_files_as_added" for a given DataFileUploadDirectory, and log a message
    when it gets shut down

    Args:
        upload_directory (DataFileUploadDirectory): the DataFileUploadDirectory to run
        args (list): passed through to "upload_files_as_added"
        kwargs (dict): passed through to "upload_files_as_added"
    """
    # This call to "upload_files_as_added" waits until the program is shut down
    uploaded_filepaths = upload_directory.upload_files_as_added(*args, **kwargs)
    msg = (
        f"The following files were uploaded:\n\t"
    )
    msg += "\n\t".join([str(fp) for fp in uploaded_filepaths])
    upload_directory.logger.info(msg)

In [10]:
# Create the DataFileUploadDirectory
dfud = DataFileUploadDirectory(TEST_FILE_DIR, CONFIG_FILE_PATH, logger=logger)
# Start running its "upload_files_as_added" function in a separate thread
upload_thread = Thread(
    target=upload_task,
    args=(
        dfud,
        TOPIC_NAME,
    ),
)
upload_thread.start()

[LocalProducer 2024-07-18 00:17:41] Will upload new files added to/Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder to the tutorial_data topic as 524288-byte chunks using 2 threads
[LocalProducer 2024-07-18 00:18:09] Will quit after all currently enqueued files are done being transferred.
[LocalProducer 2024-07-18 00:18:09] Waiting for all enqueued messages to be delivered (this may take a moment)
[LocalProducer 2024-07-18 00:18:09] The following files were uploaded:
	test_watchdog.txt


In [11]:
# Manually shut down the upload directory (if running from the command line this would
# be like typing "q" in the Terminal window)
dfud.control_command_queue.put("q")
upload_thread.join()