In [None]:
from opentldr import Workflow

### Data Repo Setup

This should be modified based on where the data is located.
This can could alternatively return an S3 bucket config.

In [None]:
data_repo_prefix = '../Data/Sample'
date_string = None

In [None]:
def build_data_repo_config(folder_name:str) -> dict:
        '''
        builds a simple config for files. Folder_name is inserted into path.
        (for example "content")
        if a date_string is specified, it will include that.
        '''
        if date_string is None:
                return {
                        'repo_type': 'files', 
                        'path': '{}/{}'.format(data_repo_prefix,folder_name),
                }
        else:
                return {
                        'repo_type': 'files', 
                        'path': '{}/{}/{}'.format(data_repo_prefix, date_string, folder_name),
                }

## LLM Setup

This should be modified to output an LLM config that you want to use.

In [None]:
type = 'ollama'

In [None]:
def build_llm_config() -> dict:
        '''
        builds a simple llm config that inserts model info.
        '''
        match (type.lower()):
            case "ollama":
                return {
                    'type': 'Ollama',
                    'device':'local',
                    'model':'mistral:latest'
                }
            case "gpt4all":
                return {
                    'type': 'GPT4ALL',
                    'device':'gpu',
                    'model':'../LLM_Models/mistral-7b-openorca.gguf2.Q4_0.gguf'
                }
            case _ :
                return {
                    'type': 'GPT4ALL',
                    'device':'gpu',
                    'model':'../LLM_Models/mistral-7b-openorca.gguf2.Q4_0.gguf'
                }

## Specify the Workflow in code
The workflow includes:
- **Output**: the directory that the workflow writes copies of the notebooks as executed (read only!)
- **Notebooks**: this is a list of notebooks (full path) in the order that they should be executed
    - For each notebook the set of parameters that are to be passed into it thru the workflow process

In [None]:
workflow = {
    
    # Where a read-only version of the notebook AFTER execution is stored
    "Output": "./READ_ONLY_OUTPUT",
    
    # Parameters passed into all notebooks in workflow
    "Common": {
        "logging_level":10,
        "verbose": True,
    },

    # Order and parameters of notebooks to execute in workflow
    "Notebooks": [

        # Setup the KG
        [ "Stage_1_Initialize/Clear_All.ipynb", {
            "message": "Successfully passed in parameters from Workflow.ipynb!",
        }],

        [ "Stage_1_Initialize/Load_Reference_Data.ipynb", {
            "data_repo_config": build_data_repo_config('reference'),
        }],

        # Load Content and Requests
        [ "Stage_2_Ingest/Load_Content.ipynb", {
            "data_repo_config": build_data_repo_config('content'),
        }],

        [ "Stage_2_Ingest/Load_Requests.ipynb", {
            "data_repo_config": build_data_repo_config('request'),
        }],

        # Generate stand-alone untailored summaries ONLY NEEDED for multi-doc comparisons in digger
        [ "Stage_5_Summarize/Presummarize.ipynb",{
            "llm_config" : build_llm_config(),
        }],

        # Perform Analytics to link entities in Requests and Content nodes
        [ "Stage_3_Connect/Entity_Cosin_Similarity.ipynb",{
            "sentence_embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
            "connect_threshold": 0.6,
            "hypothesize_threshold": 0.9
        }],

        # Compute recommendations based on relevance of content to request
        [ "Stage_4_Recommend/Shortest_Path_Scoring.ipynb", {
            "recommendation_threshold": 0.6
        }],

        # Generate a summary of the content that is tailored with respect to the request and useful reference knowledge
        [ "Stage_5_Summarize/Tailored_Abstractive_Summary.ipynb", {
            "llm_config" : build_llm_config(),
            "llm_prompt": "You are a helpful assistant responding to the request: {request} \n\n and were given these facts: {knowledge} \n\n Concisely summarize the following article: {content}"
        }],

        # Produce a TLDR Report for each request
        [ "Stage_6_Produce/Build_TLDR.ipynb", {}],

        # Run the Evaluation
        [ "Stage_7_Evaluate/Evaluate.ipynb", {
            "data_repo_config": build_data_repo_config('evalkey'),
            "sentence_embedding_model": "sentence-transformers/all-MiniLM-L6-v2"
        }]
    ]}

In [None]:
wf:Workflow = Workflow(workflow)
wf.run()