In [1]:
# set text wrapping
from IPython.display import HTML, display


def set_css():
    display(
        HTML(
            """
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  """
        )
    )


get_ipython().events.register("pre_run_cell", set_css)

In [40]:
from llama_index import download_loader, VectorStoreIndex, ServiceContext
from pathlib import Path

In [32]:
import llama_cpp
import json
from llama_index import SimpleDirectoryReader, Document

In [27]:
data = json.load(open("data.json", "r"))
print(data[0]["title"], data[0]["bodyText"][:50])

RecursionError: maximum recursion depth exceeded in comparison Dagster version
1.4.5
What's the issue?
It's occur


In [28]:
print(data[0].keys())

dict_keys(['id', 'number', 'title', 'bodyText', 'state', 'createdAt', 'closedAt', 'url', 'labels', 'reactions'])


In [48]:
documents = []
for record in data:
    documents.append(
        Document(
            _id=record["id"],
            text=record["title"] + record["bodyText"],
            metadata={
                "url": record["url"],
                "labels": record["labels"].get("nodes", []),
                "reactions": record["reactions"].get("totalCount", 0),
            },
        )
    )

In [49]:
from llama_index.schema import MetadataMode

document = documents[2]
print("The LLM sees this: \n", document.get_content(metadata_mode=MetadataMode.LLM))

The LLM sees this: 
 url: https://github.com/dagster-io/dagster/issues/15825
labels: []
reactions: 0

Simplify setting allow_nonexistent_upstream_partitionsWhat's the use case?
We would like to provide data owners with the ability to configure allow_nonexistent_upstream_partitions in their dbt models.
Currently the only way to pass this in when constructing an Asset in Dagster is by defining a TimeWindowPartitionsMapping.
Ideas of implementation
Ideally this setting can be defined at the asset level so that it is applied to all AssetIns, OR can be set while creating the AssetIns dictionary.
asset = AssetsDefinition(
    ...
    allow_nonexistent_upstream_partitions=True
)
Or
ins = {"parent_asset": ("parent_op": In(allow_nonexistent_upstream_partitions=True, Nothing))}
Additional information
No response
Message from the maintainers
Impacted by this issue? Give it a 👍! We factor engagement into prioritization.


In [50]:
print(
    "The Embedding model sees this: \n",
    document.get_content(metadata_mode=MetadataMode.EMBED),
)

The Embedding model sees this: 
 url: https://github.com/dagster-io/dagster/issues/15825
labels: []
reactions: 0

Simplify setting allow_nonexistent_upstream_partitionsWhat's the use case?
We would like to provide data owners with the ability to configure allow_nonexistent_upstream_partitions in their dbt models.
Currently the only way to pass this in when constructing an Asset in Dagster is by defining a TimeWindowPartitionsMapping.
Ideas of implementation
Ideally this setting can be defined at the asset level so that it is applied to all AssetIns, OR can be set while creating the AssetIns dictionary.
asset = AssetsDefinition(
    ...
    allow_nonexistent_upstream_partitions=True
)
Or
ins = {"parent_asset": ("parent_op": In(allow_nonexistent_upstream_partitions=True, Nothing))}
Additional information
No response
Message from the maintainers
Impacted by this issue? Give it a 👍! We factor engagement into prioritization.


In [51]:
from llama_index.node_parser import SimpleNodeParser

parser = SimpleNodeParser.from_defaults()

nodes = parser.get_nodes_from_documents(documents)

In [52]:
nodes[0]

TextNode(id_='895b74cc-f2c1-4377-9c39-2c9c702bba8f', embedding=None, metadata={'url': 'https://github.com/dagster-io/dagster/issues/15822', 'labels': [{'name': 'type: bug', 'description': "Something isn't working"}], 'reactions': 3}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='3c0176a8-dffe-43b7-a1c3-cba938a8f5d6', node_type=None, metadata={'url': 'https://github.com/dagster-io/dagster/issues/15822', 'labels': [{'name': 'type: bug', 'description': "Something isn't working"}], 'reactions': 3}, hash='ab54c0ff052a0ff2e0644d22fe9e8e7243ba06f74f99006d9db83973d5ec998f'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='30aaa469-6f03-4adc-858b-83e8f81ce06f', node_type=None, metadata={'url': 'https://github.com/dagster-io/dagster/issues/15822', 'labels': [{'name': 'type: bug', 'description': "Something isn't working"}], 'reactions': 3}, hash='bb09734311eb0c7f972d795675dcfe33ee7f485167b5602ea84173d

In [53]:
from llama_index.node_parser.extractors import (
    MetadataExtractor,
    TitleExtractor,
    QuestionsAnsweredExtractor,
)
from llama_index.text_splitter import TokenTextSplitter

text_splitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128)
metadata_extractor = MetadataExtractor(
    extractors=[
        TitleExtractor(nodes=5),
        QuestionsAnsweredExtractor(questions=3),
    ],
)

node_parser = SimpleNodeParser.from_defaults(
    text_splitter=text_splitter,
    metadata_extractor=metadata_extractor,
)
# assume documents are defined -> extract nodes
nodes = node_parser.get_nodes_from_documents(documents)

Extracting questions:   0%|          | 0/176 [00:00<?, ?it/s]

In [64]:
from llama_index.llms import LlamaCPP

llama = LlamaCPP(
    model_path="/home/pedram/projects/llm-cache/models/openbuddy-llama2-13b-v11.1.Q4_0.gguf"
)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from /home/pedram/projects/llm-cache/models/openbuddy-llama2-13b-v11.1.Q4_0.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_0     [  5120, 37632,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q4_0     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q4_0     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q4_0     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q4_0     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q4_0     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight q4_0     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    7:            blk.0.ffn_dow

In [65]:
from llama_index import ServiceContext

service_context = ServiceContext.from_defaults(llm=llama)

In [66]:
index = VectorStoreIndex(nodes)

In [82]:
print(nodes[5].get_content(metadata_mode="all"))

[Excerpt from document]
url: https://github.com/dagster-io/dagster/issues/15822
labels: [{'name': 'type: bug', 'description': "Something isn't working"}]
reactions: 3
document_title: Error handling and troubleshooting in Python code for fetching input asset version and event information
questions_this_excerpt_can_answer: 1. What is the purpose of the "retry_pg_connection_fn" function in the "utils.py" file?
2. What is the role of the "raw_connection" method in the SQLAlchemy engine?
3. How does the "connect" method in the SQLAlchemy pool handle connection requests?
Excerpt:
-----
line 165, in create_pg_connection
    conn = retry_pg_connection_fn(engine.connect)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/dagster_postgres/utils.py", line 117, in retry_pg_connection_fn
    return fn()
           ^^^^
  File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 3325, in connect
    return self._connection_cls(self,

In [83]:
query_engine = index.as_query_engine()

In [117]:
response = query_engine.query(
    'You are a support bot that will search the context for a relevant discussion in response to a question from the user. If you find a relevant discussion, rephrase the users question and provide them a link to the Github Discussion URL. The users quesiton is: "How do I setup a custom step launchers"'
)

In [118]:
from llama_index.response.notebook_utils import display_source_node, display_response

In [119]:
display_response(
    response, source_length=1000, show_source=True, show_source_metadata=True
)

**`Final Response:`** I'm sorry, but I couldn't find a relevant discussion in the given context regarding setting up custom step launchers.

---

**`Source Node 1/2`**

**Node ID:** dfd83b6b-5a18-4eaa-a59d-aba4de3405bc<br>**Similarity:** 0.8144518152007685<br>**Text:** step_context):
        step_context.log.info("Launching step from custom StepLauncher")
        sleep(10)


@op()
def my_custom_step(context, step_launcher: NewStepLauncher):
    pass


@job(resource_defs={"step_launcher": NewStepLauncher})
def my_custom_job():
    my_custom_step()

Deployment type
Dagster Cloud
Deployment details
No response
Additional information
No response
Message from the maintainers
Impacted by this issue? Give it a 👍! We factor engagement into prioritization.<br>**Metadata:** {'url': 'https://github.com/dagster-io/dagster/issues/15943', 'labels': [{'name': 'type: bug', 'description': "Something isn't working"}, {'name': 'area: resource', 'description': 'Related to Resources'}], 'reactions': 0, 'document_title': 'Error handling and troubleshooting in Python code for fetching input asset version and event information', 'questions_this_excerpt_can_answer': '1. What is the purpose of the "my_custom_step" function in the Python code?\n2. What is the role of the "NewStepLauncher" resource in the "my_custom_job" function?\n3. How long does the step wait before launching in the "step_context" code snippet?'}<br>

---

**`Source Node 2/2`**

**Node ID:** b2ae0d2d-344a-46b2-8261-1a29bb8ba299<br>**Similarity:** 0.8061426729508149<br>**Text:** Custom StepLauncher can't be used as a ConfigurableResourceDagster version
v1.4.7
What's the issue?
It appears that passing a custom step launcher as a ConfigurableResource does not currently work. This prevents any jobs that use a custom step launcher from leveraging ConfigurableResources.
What did you expect to happen?
I was hoping that I could create a StepLauncher which subclasses from both StepLauncher and ConfigurableResource, then pass it into an op the same way ConfigurableResources are usually passed, avoiding the context and required_resource_keys. While working through it I realized that this would still be a bit clunky, as the step launcher resource wouldn't actually be used by the op. So it feels like maybe a different means by which to pass the step launcher / associate it with an op might be necessary - maybe as part of the @op arguments? Like
@op(step_launcher=CustomStepLauncher)
def custom_step_launcher_op():
  pass

although you'd still need to be able to allow for...<br>**Metadata:** {'url': 'https://github.com/dagster-io/dagster/issues/15943', 'labels': [{'name': 'type: bug', 'description': "Something isn't working"}, {'name': 'area: resource', 'description': 'Related to Resources'}], 'reactions': 0, 'document_title': 'Error handling and troubleshooting in Python code for fetching input asset version and event information', 'questions_this_excerpt_can_answer': '1. What is the issue with using a custom step launcher as a ConfigurableResource in Dagster?\n2. What was the expected behavior when passing a custom step launcher as a ConfigurableResource?\n3. How can the issue of passing a step launcher and associating it with an op be resolved in Dagster?'}<br>