In [1]:
from sources import DataSource, DataFrame
from sinks import DataSink
from processors import DataProcessor


In [None]:
from dataclasses import dataclass
from typing import Iterable, Optional
import time
import threading
# --------------------------------------------------
# Example concrete implementations:
# --------------------------------------------------

# Define a simple data class as our DataFrame.
@dataclass
class MyFrame:
    value: int

# 1. A simple DataSource that generates MyFrame objects.
class SimpleSource(DataSource[MyFrame]):
    def __init__(self, total: int = 5, delay: float = 0.2):
        self.remaining = total
        self.delay = delay
        self.lock = threading.Lock()
        super().__init__()
    
    def fetch(self) -> MyFrame:
        with self.lock:
            if self.remaining <= 0:
                print("[SimpleSource] No more data to fetch.")
                return None
            self.remaining -= 1
            value = self.remaining
        # Simulate delay in fetching data.
        time.sleep(self.delay)
        frame = MyFrame(value)
        print(f"[SimpleSource] Fetched: {frame}")
        return frame
    
    def close(self) -> None:
        print("[SimpleSource] closed.")

# 2. A DataProcessor that doubles the value in MyFrame.
class DoubleProcessor(DataProcessor[MyFrame, MyFrame]):
    def __init__(self, sources: Iterable[DataSource[MyFrame]]):
        super().__init__(sources)
    
    def process(self, data: MyFrame) -> MyFrame:
        new_frame = MyFrame(data.value * 2)
        print(f"[DoubleProcessor] {data.value} doubled to {new_frame.value}")
        return new_frame

# 3. A DataProcessor that increments the value in MyFrame.
class IncrementProcessor(DataProcessor[MyFrame, MyFrame]):
    def __init__(self, sources: Iterable[DataSource[MyFrame]]):
        super().__init__(sources)
    
    def process(self, data: MyFrame) -> MyFrame:
        new_frame = MyFrame(data.value + 1)
        print(f"[IncrementProcessor] {data.value} incremented to {new_frame.value}")
        return new_frame

# 4. A DataSink that prints the final MyFrame.
class PrintSink(DataSink[MyFrame]):
    def __init__(self, sources: Iterable[DataSource[MyFrame]]):
        super().__init__(sources)
    
    def write(self, data: MyFrame) -> None:
        print(f"[PrintSink] Final output: {data}")

    def close(self) -> None:
        print("[PrintSink] closed.")
        super().close()


# --------------------------------------------------
# Example Pipeline Chaining:
# SimpleSource --> DoubleProcessor --> IncrementProcessor --> PrintSink
# --------------------------------------------------
# Instantiate a source that produces 5 MyFrame items.
source = SimpleSource(total=5, delay=0.1)

# First processor doubles the value.
double_proc = DoubleProcessor(sources=[source])

# Second processor increments the value.
# increment_proc = IncrementProcessor(sources=[double_proc])

# Sink prints the processed data.
sink = PrintSink(sources=[double_proc])

# Consume data from the sink.
print("\n--- Processing Pipeline Output ---")
try:
    for item in sink:
       # The sink's process() already calls sink() internally,
        # so here we can just iterate to ensure the pipeline runs.
        pass
except KeyboardInterrupt:
    pass
finally:
    sink.__close__()

DataSource: Detected DataFrame class: <class '__main__.MyFrame'>
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more data to fetch.
[SimpleSource] No more 

In [5]:
# source = SimpleSource(total=5, delay=0.1)
source.dataframe_type

__main__.MyFrame

In [6]:
# double_proc = DoubleProcessor(sources=[source])
double_proc.dataframe_type

__main__.MyFrame

In [7]:
sink = PrintSink(sources=[increment_proc])
sink.dataframe_type

[PrintSink] Final output: MyFrame(value=9)


In [8]:
sink.dataframe_type

In [18]:
args

(__main__.MyFrame, __main__.MyFrame)