[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wandb/weave/blob/anish/add-spacerag-example/examples/cookbooks/rag/spacerag/part2.ipynb)
<!--- @wandbcode{weave-spacerag-cookbook} -->

In [1]:
IN_COLAB = False
try:
    from google.colab import userdata
    import os
    os.environ["WANDB_API_KEY"] = userdata.get("WANDB_API_KEY")
    os.environ["TOGETHER_API_KEY"] = userdata.get("TOGETHER_API_KEY")
    os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
    IN_COLAB = True
except:
    from dotenv import load_dotenv
    load_dotenv()

In [2]:
import os
import subprocess
import shutil

repo_url = "https://github.com/wandb/weave.git"
target_folder = "weave_cookbooks"
subdirectory = "examples/cookbooks"
branch = "anish/add-spacerag-example"

if not os.path.exists(target_folder) and IN_COLAB:
    print(f"Cloning repository: {repo_url}")

    # Clone the entire repository to a temporary folder
    temp_folder = "temp_weave_repo"
    subprocess.run(["git", "clone", "--depth", "1", "--branch", branch, repo_url, temp_folder], check=True)

    # Move the desired subdirectory to the target folder
    shutil.move(os.path.join(temp_folder, subdirectory), target_folder)

    # Remove the temporary folder
    shutil.rmtree(temp_folder)

    print(f"Successfully cloned {subdirectory} from branch '{branch}' to {target_folder}")
    
else:
    print(f"Folder '{target_folder}' already exists.")

Folder 'weave_cookbooks' already exists.


In [3]:
if os.path.exists(target_folder) and IN_COLAB:
    %cd weave_cookbooks/summarization
    !pip install -r requirements.txt

In [4]:
import weave
from weave import Evaluation
import os
import numpy as np
import faiss
from openai import OpenAI
from together import Together
import re
import json

In [5]:
weave.init('space_rag_example')

# SERVE MODEL FROM TOGETHER ENDPOINT
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))

Logged in as Weights & Biases user: a-sh0ts.
View Weave data at https://wandb.ai/a-sh0ts/space_rag_example/weave


In [6]:
# CHUNK DATA FROM EXTERNAL KNOWLEDGEBASE
@weave.op
def get_chunked_data(file):
    # get data - file
    with open(file, 'r') as file:
        # Read the contents of the file into a variable
        text = file.read()

    # split doc into chunks
    chunk_size = 2048
    chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
    return chunks

# EMBED DATA
@weave.op
def get_text_embedding(input):
    api_key_openai = os.environ["OPENAI_API_KEY"]
    client = OpenAI(api_key=api_key_openai)
    
    response = client.embeddings.create(
        model="text-embedding-3-small",
        input=input
    )
    return response.data[0].embedding

# MAKE VECTORDB
@weave.op
def make_vector_db(file):
    # get chunked data from function get_chunked_data()
    chunks = get_chunked_data(file)
    # embed data
    text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
    # embed data into vectordb
    d = text_embeddings.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(text_embeddings)
    return index, chunks

In [7]:
file = './data/space.txt'
index, chunks = make_vector_db(file)

🍩 https://wandb.ai/a-sh0ts/space_rag_example/r/call/15af1517-98c4-4fd3-9fb5-655896bc6fb7


In [8]:
# ANSWER QUESTION
@weave.op
def predict(model, prompt):
    completion = client.chat.completions.create(
        model=model,
        messages=[{"role":"user","content":prompt}],
        temperature=0.5,
        top_p=1,
        max_tokens=1024,
        stream=True
    )

    answer = []
    for chunk in completion:
        if chunk.choices[0].delta.content is not None:
            answer.append(chunk.choices[0].delta.content)
    
    result = ''.join(answer)
    print(result)

    return result

In [9]:
# RETRIEVE CHUNKS SIMILAR TO THE QUESTION
@weave.op
def retrieve_context(question: str) -> list:
    question_embeddings = np.array([get_text_embedding(question)])
    # Retrieve similar chunks from the vectorDB
    D, I = index.search(question_embeddings, k=2) 
    retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
    return retrieved_chunk
    
class SpaceRAGModel(weave.Model):
    model: str

    @weave.op()
    def predict(self, question: str):
        retrieved_chunk = retrieve_context(question)
        print("Question: "+question)

        # Combine context and question in a prompt
        prompt = f"""
        Use this context to answer the question, don't use any prior knowledge.
        Be concise in your answers.
        ---------------------
        {retrieved_chunk}
        ---------------------
        Question: {question}
        Answer:
        """
        answer = predict(self.model, prompt)
        print("___________________________")
        return {'answer': answer, 'retrieved_chunk': retrieved_chunk}

In [10]:
def string_to_dict(input_string):
    # Use regular expressions to find all JSON-like objects in the string
    json_objects = re.findall(r'\{.*?\}', input_string)

    # Initialize an empty dictionary to store the combined results
    combined_dict = {}

    for obj in json_objects:
        try:
            # Parse each JSON object
            parsed_dict = json.loads(obj)
            # Update the combined dictionary with the parsed data
            combined_dict.update(parsed_dict)
        except (ValueError, json.JSONDecodeError) as e:
            print(f"Error processing part: {obj}\nError: {e}")

    return combined_dict

In [11]:
dataset_ref = weave.ref("weave:///lavanyashukla/spacedata/object/space_dataset_llm_comprehensive:VBd5Ys7b3hGFmJJqdGTATVQYgKKrg70EiNV5FdpwFxs").get()
small_questions = dataset_ref.rows[:5]

In [12]:
def replace_nan_in_dict(result):
    for key in result:
        if isinstance(result[key], float) and np.isnan(result[key]):
            result[key] = 0
    return result

In [13]:
# Evaluate with an LLM
@weave.op
def llm_judge_scorer(ground_truth: str, model_output: dict) -> dict:
    scorer_llm = "meta-llama/Meta-Llama-3-70B-Instruct-Turbo"
    answer = model_output['answer']
    retrieved_chunk = model_output['retrieved_chunk']

    eval_rubrics = [
    {
        "metric": "concise",
        "rubrics": """
        Score 1: The answer is rambling and difficult to understand.
        Score 2: The answer is somewhat readable, engaging, or long winded.
        Score 3: The answer is mostly easy to understand, and is somewhat consice.
        Score 4: The answer is completely concise, readable and engaging.
        """,
    },
    # {
    #     "metric": "relevant",
    #     "rubrics": """
    #     Score 1: The answer is not relevant to the original text. 
    #     Score 2: The answer is somewhat relevant to the original text, but has significant flaws.
    #     Score 3: The answer is mostly relevant to the original text, and effectively conveys its main ideas and arguments.
    #     Score 4: The answer is completely relevant to the original text, and provides additional value or insight.
    #     """,
    # },
    # {
    #     "metric": "accurate",
    #     "rubrics": """
    #     Compare the factual content of the model's answer with the correct answer. Ignore any differences in style, grammar, or punctuation.
    #     Score 1: There is a disagreement between the model's answer and the correct answer.
    #     Score 2: The model's answer is a subset of the correct answer and is fully consistent with it.
    #     Score 3: The answers differ, but these differences don't matter from the perspective of factuality.
    #     Score 4: The model's answer contains all the same details as the correct answer.
    #     """,
    # },
]

    scoring_prompt = """
    You have the correct answer, original text and the model's answer below.
    Based on the specified evaluation metric and rubric, assign an integer score between 1 and 4 to the summary. 
    Then, return a JSON object with the metric name as the key and the evaluation score as the value. Don't output anything else.

    # Evaluation metric:
    {metric}

    # Evaluation rubrics:
    {rubrics}

    # Correct Answer
    {ground_truth}
    
    # Original Text
    {retrieved_chunk}

    # Model Answer
    {model_answer}

    """
    evals = ""
    for i in eval_rubrics:
        eval_output = predict(scorer_llm,
            scoring_prompt.format(
                ground_truth=ground_truth, retrieved_chunk=retrieved_chunk, model_answer=answer,
                metric=i["metric"], rubrics=i["rubrics"]
            ))+" "
        evals+=eval_output
    # evals_json = format_string_to_json(evals)
    evals_dict = string_to_dict(evals)
    # print("___________________________")
    # print(evals_dict)
    # print("___________________________")
    return evals_dict

In [14]:
def ragas_score(question, ground_truth, model_output):
    from datasets import Dataset
    from ragas import evaluate
    from ragas.metrics import faithfulness, answer_relevancy, answer_correctness, context_recall, context_precision

    metric_modules = [
        faithfulness,
        answer_relevancy,
        answer_correctness,
        context_recall,
        context_precision
    ]
    
    # Convert the retrieved_chunk to a list of strings
    contexts = [str(chunk) for chunk in model_output["retrieved_chunk"]]
    
    qa_dataset = Dataset.from_dict(
        {
            "question": [question],
            "ground_truth": [ground_truth],
            "answer": [model_output["answer"]],
            "contexts": [contexts],  # Wrap contexts in another list
        }
    )
    result = evaluate(qa_dataset, metrics=metric_modules,
                      raise_exceptions=False)
    return replace_nan_in_dict(result)

In [15]:
@weave.op()
def tonic_validate_score(question: str, ground_truth: str, model_output: dict) -> dict:
    from tonic_validate import Benchmark, ValidateScorer
    from tonic_validate.metrics import DuplicationMetric

    metric_modules = [DuplicationMetric()]

    def get_llm_response(question):
        return {
            "llm_answer": model_output['answer'],
            "llm_context_list": (
                [model_output['retrieved_chunk']]
                if isinstance(model_output['retrieved_chunk'], str)
                else model_output['retrieved_chunk']
            ),
        }

    benchmark = Benchmark(questions=[question], answers=[ground_truth])
    scorer = ValidateScorer(metrics=metric_modules)
    run = scorer.score(benchmark, get_llm_response)
    return run.run_data[0].scores

In [16]:
models = ["meta-llama/Meta-Llama-3-70B-Instruct-Turbo",
          "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
          "Snowflake/snowflake-arctic-instruct",
          "mistralai/Mixtral-8x22B-Instruct-v0.1"]
for model in models:
    rag_model = SpaceRAGModel(model=model)
    evaluation = Evaluation(dataset=small_questions, scorers=[
    llm_judge_scorer,
    ragas_score,
    tonic_validate_score
])
    print(f"RAG Model: {model}")
    await evaluation.evaluate(rag_model)

RAG Model: meta-llama/Meta-Llama-3-70B-Instruct-Turbo
Question: What are the steps involved in the process of producing aluminum from lunar soil, specifically in regards to glass grinding and acid leaching?
Question: How is the sense of spaciousness addressed in the design of habitats for the colony?
Question: What is the importance of atmospheric pressure in large colonies in space?
Question: How does the expense of providing human workers encourage reliance on automation and the push for extreme reliability and maintainability in space commercial ventures?
Question: What are some potential benefits of long-term development in space?
According to the text, the steps involved in producing aluminum from lunar soil, specifically in regards to glass grinding and acid leaching, are:

1. The lunar soil is melted in a solar furnace at a temperature of 2000 K and then quenched in water to form a glass.
2. The glass is ground to 65 mesh.
3. The ground glass is leached with sulfuric acid.
_____

  from .autonotebook import tqdm as notebook_tqdm


{"concise": 3}


Error in WeaveTracer.on_chain_start callback: KeyError('inputs')
Error in WeaveTracer.on_chain_start callback: KeyError('inputs')
Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


The text does not explicitly answer this question. However, it can be inferred that the expense of providing human workers is high, as the productivity of humans in space is difficult to estimate and is affected by the zero-g and high-vacuum environment. The use of remote operation and automation (e.g., the extraction plant is operated remotely) may be a way to reduce the cost and risk associated with human labor in space.
___________________________


Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 3}


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]
[AError in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 3}




[A[A


Evaluating:  20%|██        | 1/5 [00:10<00:40, 10.14s/it]

[A[A
[A


[A[A[ARunner in Executor raised an exception
Evaluating:  60%|██████    | 3/5 [00:12<00:07,  3.51s/it]Runner in Executor raised an exception
Runner in Executor raised an exception
Evaluating:  80%|████████  | 4/5 [00:12<00:02,  2.39s/it]Runner in Executor raised an exception
Evaluating: 100%|██████████| 5/5 [00:13<00:00,  2.61s/it]
  value = np.nanmean(self.scores[cn])
Runner in Executor raised an exception


[A[ARunner in Executor raised an exception
Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 13148.29it/s]
Scoring responses:   0%|          | 0/1 [00:00<?, ?it/s]Runner in Executor raised an exception

[A


Evaluating: 100%|██████████| 5/5 [00:15<00:00,  3.08s/it]
  value = np.nanmean(self.scores[cn])
Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.77s/it]
Runner in Executor raised an exception



[A[A[A
Evaluating: 100%|██████████| 5/5 [00:15<00:00,  3.09s/it]
Evaluating: 

Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 16912.52it/s]
Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 14463.12it/s]

Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 20460.02it/s]

[A

Evaluating: 100%|██████████| 5/5 [00:16<00:00,  3.35s/it]
Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 13934.56it/s]
Scoring responses:   0%|          | 0/1 [00:00<?, ?it/s]

Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.70s/it]

Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


RAG Model: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
Question: How is the sense of spaciousness addressed in the design of habitats for the colony?
Question: How does the expense of providing human workers encourage reliance on automation and the push for extreme reliability and maintainability in space commercial ventures?
Question: What are some potential benefits of long-term development in space?
Question: What is the importance of atmospheric pressure in large colonies in space?
Question: What are the steps involved in the process of producing aluminum from lunar soil, specifically in regards to glass grinding and acid leaching?
Improved communication and Earth-sensing satellites, direct broadcasting of radio and TV to Earth, and reduced costs for power and stability in space.
___________________________
Fire protection is practical in an atmosphere with a total pressure of 36 kPa.
___________________________
The sense of spaciousness is addressed by providing a large-scale habi

Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 4}


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

{"concise": 4}The expense of providing human workers in space is high due to the need for life support systems, habitats, and transportation to and from the space environment. This high expense encourages reliance on automation to reduce labor costs. Additionally, the difficulty and expense of accessing and repairing equipment in space emphasizes the need for extreme reliability and maintainability.
___________________________



Error in WeaveTracer.on_chain_start callback: KeyError('inputs')
Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 4}



[AError in WeaveTracer.on_chain_start callback: KeyError('inputs')
Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 4}
{"concise": 4}




[A[A


Evaluating:  20%|██        | 1/5 [00:07<00:28,  7.17s/it]
[A


[A[A[A

Evaluating:  60%|██████    | 3/5 [00:08<00:05,  2.51s/it]
[A

Evaluating:  80%|████████  | 4/5 [00:11<00:02,  2.44s/it]


[A[A[ARunner in Executor raised an exception


[A[A
[A


Evaluating: 100%|██████████| 5/5 [00:17<00:00,  3.47s/it]
Runner in Executor raised an exception

Evaluating: 100%|██████████| 5/5 [00:17<00:00,  3.54s/it]
  value = np.nanmean(self.scores[cn])



Evaluating: 100%|██████████| 5/5 [00:17<00:00,  3.57s/it]

Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 3086.32it/s]
Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 540.57it/s]

[A


Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 15141.89it/s]



Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.75s/it]


Evaluating: 100%|██████████| 5/5 [00:20<00:00,  4.05s/it]

Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.91s/it]



Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.5

Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 16384.00it/s]
Runner in Executor raised an exception
Evaluating: 100%|██████████| 5/5 [00:22<00:00,  4.47s/it]
Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.24s/it]


Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 13315.25it/s]
Scoring responses:   0%|          | 0/1 [00:00<?, ?it/s]

Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.48s/it]


RAG Model: Snowflake/snowflake-arctic-instruct
Question: How is the sense of spaciousness addressed in the design of habitats for the colony?
Question: What is the importance of atmospheric pressure in large colonies in space?
Question: What are some potential benefits of long-term development in space?
Question: What are the steps involved in the process of producing aluminum from lunar soil, specifically in regards to glass grinding and acid leaching?
Question: How does the expense of providing human workers encourage reliance on automation and the push for extreme reliability and maintainability in space commercial ventures?
 Atmospheric pressure is important in large colonies in space as it ensures life processes are adequately maintained, prevents unusual forms of decompression, and provides a greater safety margin during accidental pressure drops or oxygen dilution by inert gases. The total pressure of the atmosphere should be practical, with half of it being oxygen, as seen in e

Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 4}


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

 Potential benefits of long-term development in space include advancements in automation, materials technology, and other technological innovations, which may drive the progress of space colonization more rapidly and on a larger scale than currently anticipated. Additionally, space colonization can have a favorable effect on communication and Earth-sensing satellites, leading to improvements in data-link applications, tracking, and broadcasting. It may also provide solutions for energy production through space-based power systems. However, it is essential to address the potential challenges and risks associated with space colonization, such as health risks from high-energy radiation and the psychological impact of long-term space travel.
___________________________
 The steps involved in the process of producing aluminum from lunar soil, specifically in regards to glass grinding and acid leaching, are as follows:

1. The lunar soil is melted in a solar furnace at a temperature of 2000 

Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 4}




{"concise": 4}


Error in WeaveTracer.on_chain_start callback: KeyError('inputs')

[A

 The expense of providing human workers in space encourages reliance on automation and the push for extreme reliability and maintainability in space commercial ventures because it is more cost-effective to use machines and systems that require minimal human intervention. This is due to the high costs associated with transporting and supporting human workers in space, as well as the risks involved in relying on humans for complex tasks. By using automated systems, space ventures can save on labor costs and minimize the need for human intervention, thereby increasing efficiency and reducing the risk of errors or malfunctions. Additionally, the harsh and unforgiving environment of space demands high levels of reliability and maintainability in the equipment and systems used, further driving the push for automation and extreme reliability in space commercial ventures.
___________________________
 The sense of spaciousness is addressed in the design of habitats for the colony by providing a

Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 4}




[A[AError in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 2}





[A[A[A

[A[ARunner in Executor raised an exception


[A[ARunner in Executor raised an exception
Evaluating:  20%|██        | 1/5 [00:07<00:29,  7.42s/it]

[A[A
Evaluating:  60%|██████    | 3/5 [00:09<00:05,  2.68s/it]


[A[A[ARunner in Executor raised an exception
Evaluating:  80%|████████  | 4/5 [00:10<00:02,  2.05s/it]Runner in Executor raised an exception


[A[A
[A


[A[A[ARunner in Executor raised an exception



Evaluating: 100%|██████████| 5/5 [00:13<00:00,  2.61s/it]
  value = np.nanmean(self.scores[cn])
Evaluating: 100%|██████████| 5/5 [00:15<00:00,  3.05s/it]
Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 10866.07it/s]



Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 12671.61it/s]



Evaluating: 100%|██████████| 5/5 [00:17<00:00,  3.55s/it]
Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.73s/it]
Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 14926.35it/s]
Scoring responses:   0%|          | 0/1 [00:00<?, ?it/s]


Sco

Runner in Executor raised an exception

Scoring responses: 100%|██████████| 1/1 [00:02<00:00,  2.32s/it]


Runner in Executor raised an exception

Evaluating: 100%|██████████| 5/5 [00:29<00:00,  5.80s/it]
Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 12865.96it/s]
Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.64s/it]


Runner in Executor raised an exception


Evaluating: 100%|██████████| 5/5 [00:33<00:00,  6.64s/it]
Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 13148.29it/s]
Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


RAG Model: mistralai/Mixtral-8x22B-Instruct-v0.1
Question: What is the importance of atmospheric pressure in large colonies in space?
Question: How does the expense of providing human workers encourage reliance on automation and the push for extreme reliability and maintainability in space commercial ventures?
Question: What are the steps involved in the process of producing aluminum from lunar soil, specifically in regards to glass grinding and acid leaching?
Question: How is the sense of spaciousness addressed in the design of habitats for the colony?
Question: What are some potential benefits of long-term development in space?
 The process involves melting lunar soil in a solar furnace at 2000 K, then quenching it in water to form a glass. The glass is then separated in a centrifuge and the steam is condensed in radiators. The glass is ground to 65 mesh and leached with sulfuric acid. The pregnant solution containing aluminum sulfate is then separated from the waste material in a ce

Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


 Atmospheric pressure is important for fire protection and sustaining life processes in large space colonies. In this context, a mix of 50% oxygen and 50% nitrogen with a total pressure of 36 kPa is considered practical, with half of it being oxygen.
___________________________
{"concise": 4}
 The high cost of transporting and maintaining human workers in space encourages the use of automation. The need for extreme reliability and maintainability arises from the fact that repairs and replacements in space are expensive and difficult, making it more cost-effective to invest in highly reliable and easily maintainable systems.
___________________________


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 4}


Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 4}



[A

 The sense of spaciousness is addressed by providing large-scale vistas, making the habitat large enough to lessen the sense of artificiality. Additionally, some parts of the habitat are designed to be out of sight of others, and contact with the actual environment of space is provided through convenient access to regions of zero gravity and views of the Earth, the Moon, and stars.
___________________________


Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 4}




[A[A

 Some potential benefits of long-term development in space include advancements in automation, materials technology, and improved communication and Earth-sensing satellites. Space colonization could also lead to direct broadcasting of radio and TV from orbit, once low-cost power in space is available. Additionally, colonists could perform servicing and construction of satellites.
___________________________


Error in WeaveTracer.on_chain_start callback: KeyError('inputs')


{"concise": 4}





[A[A[ARunner in Executor raised an exception



[A[A[ARunner in Executor raised an exception



[A[A[ARunner in Executor raised an exception



[A[A[ARunner in Executor raised an exception
Runner in Executor raised an exception
Evaluating: 100%|██████████| 5/5 [00:02<00:00,  1.71it/s]
  value = np.nanmean(self.scores[cn])
Evaluating:  20%|██        | 1/5 [00:07<00:29,  7.47s/it]
[A

[A[A

[A[A


Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 14716.86it/s]



[A[A[A


Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.76s/it]



[A

[A[A
Evaluating:  60%|██████    | 3/5 [00:12<00:07,  3.64s/it]Runner in Executor raised an exception
Evaluating: 100%|██████████| 5/5 [00:12<00:00,  2.42s/it]
  value = np.nanmean(self.scores[cn])
Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 12826.62it/s]
Scoring responses:   0%|          | 0/1 [00:00<?, ?it/s]Runner in Executor raised an exception

Evaluating: 100%|██████████| 5/5 [00:13<00:00,  2.67s/it]

Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 9868.95it/s]

Scoring responses: 100%|██████████| 1/1 [00:02<00:00,  2.00s/it]



Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.62s/it]




[A[ARunner in Executor raised an exception
Evaluating: 100%|██████████| 5/5 [00:16<00:00,  3.25s/it]
Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 12985.46it/s]
Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.53s/it]


Runner in Executor raised an exception
Evaluating: 100%|██████████| 5/5 [00:20<00:00,  4.20s/it]
Retrieving responses: 100%|██████████| 1/1 [00:00<00:00, 15363.75it/s]
Scoring responses: 100%|██████████| 1/1 [00:01<00:00,  1.73s/it]
