# Initial Producer
This is the initial producer in the streaming pipeline. This should run on the computer dedicated to the hyperspectral camera and stream raw data out to the PARADIM broker when it is captured. The code is structured off of notebooks at github.com/openmsi/openmsistream_short_course.

## Config

In [1]:
# imports
import pathlib, logging, importlib
from threading import Thread
from openmsitoolbox.logging import OpenMSILogger
from openmsistream import UploadDataFile, DataFileUploadDirectory

In [2]:
# Configure a logger (only needed when running in a Jupyter notebook like this)
logger = OpenMSILogger("LocalProducer", filelevel=None)
importlib.reload(logging)

<module 'logging' from '/Users/namanparikh/opt/anaconda3/envs/openmsi/lib/python3.9/logging/__init__.py'>

In [14]:
# The name of the topic to work with
TOPIC_NAME = "tutorial_data"

# Paths to the config file and the directory holding the test files
repo_root_dir = pathlib.Path().resolve().parent
CONFIG_FILE_PATH = repo_root_dir / "streaming" / "config_files" / "confluent_cloud_broker.config"
TEST_FILE_DIR = repo_root_dir / "streaming" / "test_folder" / "test_data.txt"

## Upload Data (to test)
can you upload entire files as a msg?

In [15]:
CONFIG_FILE_PATH

PosixPath('/Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/config_files/confluent_cloud_broker.config')

In [16]:
TEST_FILE_DIR

PosixPath('/Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/test_data.txt')

In [18]:
TEST_FOLDER_DIR.rglob("*")

<generator object Path.rglob at 0x7face8432d60>

In [20]:
for iuf, upload_file_path in enumerate(TEST_FOLDER_DIR.rglob("*")):
    print(iuf, upload_file_path)

0 /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/.DS_Store
1 /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/test_data.txt
2 /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/.ipynb_checkpoints
3 /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/.ipynb_checkpoints/test_data-checkpoint.txt


In [17]:
# For every file in the folder
for iuf, upload_file_path in enumerate(TEST_FOLDER_DIR.rglob("*")):
    # Skip any hidden files (like .DS_Store....)
    if upload_file_path.is_dir() or upload_file_path.name.startswith("."):
        continue
    # Create an UploadDataFile and call the function to upload it to the topic
    upload_file = UploadDataFile(upload_file_path, rootdir=TEST_FILE_DIR, logger=logger)
    upload_file.upload_whole_file(CONFIG_FILE_PATH, TOPIC_NAME)

[LocalProducer 2024-07-17 22:09:26] Uploading /Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/test_data.txt to tutorial_data in 524288-byte chunks using 2 threads....
[LocalProducer 2024-07-17 22:09:26] ERROR: failed during call to Producer.produce! Will log and re-raise Exception.
Traceback (most recent call last):
  File "/Users/namanparikh/opt/anaconda3/envs/openmsi/lib/python3.9/site-packages/openmsistream/kafka_wrapper/openmsistream_producer.py", line 303, in produce_object
    key=obj.msg_key,
  File "/Users/namanparikh/opt/anaconda3/envs/openmsi/lib/python3.9/site-packages/openmsistream/data_file_io/entity/data_file_chunk.py", line 106, in msg_key
    key_pp = get_message_prepend(self.subdir_str, self.filename)
  File "/Users/namanparikh/opt/anaconda3/envs/openmsi/lib/python3.9/site-packages/openmsistream/data_file_io/entity/data_file_chunk.py", line 96, in subdir_str
    relpath = self.__filepath.parent.relative_to(self.__rootdir)


ValueError: '/Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/.ipynb_checkpoints/test_data-checkpoint.txt' is not in the subpath of '/Users/namanparikh/Documents/GitHub/paradim/reu2024-hyperspectral-camera/streaming/test_folder/test_data.txt' OR one path is relative and the other is absolute.

## DataUploadFileDirectory

In [None]:
def upload_task(upload_directory, *args, **kwargs):
    """Run "upload_files_as_added" for a given DataFileUploadDirectory, and log a message
    when it gets shut down

    Args:
        upload_directory (DataFileUploadDirectory): the DataFileUploadDirectory to run
        args (list): passed through to "upload_files_as_added"
        kwargs (dict): passed through to "upload_files_as_added"
    """
    # This call to "upload_files_as_added" waits until the program is shut down
    uploaded_filepaths = upload_directory.upload_files_as_added(*args, **kwargs)
    msg = (
        f"The following files were uploaded:\n\t"
    )
    msg += "\n\t".join([str(fp) for fp in uploaded_filepaths])
    upload_directory.logger.info(msg)

In [None]:
# Create the DataFileUploadDirectory
dfud = DataFileUploadDirectory(TEST_FILE_DIR, CONFIG_FILE_PATH, logger=logger)
# Start running its "upload_files_as_added" function in a separate thread
upload_thread = Thread(
    target=upload_task,
    args=(
        dfud,
        TOPIC_NAME,
    ),
)
upload_thread.start()

In [None]:
# Manually shut down the upload directory (if running from the command line this would
# be like typing "q" in the Terminal window)
dfud.control_command_queue.put("q")
upload_thread.join()