In [1]:
import nest_asyncio
nest_asyncio.apply()

In [2]:
import sys
sys.path.append("../")

import asyncio
import time
from langchain_core.runnables import RunnableParallel

from desci_sense.configs import default_init_parser_config
from desci_sense.shared_functions.dataloaders import scrape_post
from desci_sense.shared_functions.parsers.firebase_api_parser import FirebaseAPIParser, PromptCase

In [3]:
config = default_init_parser_config(semantics_model="mistralai/mistral-7b-instruct",
                                    kw_model="mistralai/mistral-7b-instruct")

In [4]:
# get a few posts for input
urls = ["https://mastodon.social/@psmaldino@qoto.org/111405098400404613",
        "https://mastodon.social/@UlrikeHahn@fediscience.org/111732713776994953",
        "https://mastodon.social/@ronent/111687038322549430"]
posts = [scrape_post(url) for url in urls]

In [5]:
len(posts)

3

In [6]:
parser = FirebaseAPIParser(config=config)

[32m2024-03-18 15:14:33.625[0m | [1mINFO    [0m | [36mdesci_sense.shared_functions.parsers.firebase_api_parser[0m:[36mset_md_extract_method[0m:[36m134[0m - [1mSetting metadata extraction method to none...[0m
[32m2024-03-18 15:14:33.626[0m | [1mINFO    [0m | [36mdesci_sense.shared_functions.parsers.firebase_api_parser[0m:[36m__init__[0m:[36m97[0m - [1mLoading parser model (type=mistralai/mistral-7b-instruct)...[0m
  warn_deprecated(
[32m2024-03-18 15:14:33.881[0m | [1mINFO    [0m | [36mdesci_sense.shared_functions.parsers.firebase_api_parser[0m:[36mset_kw_md_extract_method[0m:[36m138[0m - [1mSetting keywords metadata extraction method to citoid...[0m
[32m2024-03-18 15:14:33.882[0m | [1mINFO    [0m | [36mdesci_sense.shared_functions.parsers.firebase_api_parser[0m:[36minit_keyword_extraction_chain[0m:[36m406[0m - [1mLoading keyword model (type=mistralai/mistral-7b-instruct)...[0m
[32m2024-03-18 15:14:33.907[0m | [1mINFO    [0m | [36mdes

In [8]:
chain = parser.prompt_case_dict[PromptCase.SINGLE_REF]["chain"]

In [9]:
input_prompts = [parser.create_semantics_prompt_by_case(post, PromptCase.SINGLE_REF) for post in posts]

In [10]:
inputs = [{"input": prompt} for prompt in input_prompts]

In [11]:
results = await chain.abatch(inputs)

In [12]:
results[0]

{'reasoning': "[Reasoning Steps]\n\n1. The post is about a new paper published in Phil Trans B.\n2. The paper is authored by Paul Smaldino and Karolina Safarzynska.\n3. The paper is about a simple model of a group-structured public good with externalities shifting benefits toward more inequality or more equity.\n4. The post does not explicitly endorse or disagree with the paper's content.\n5. The post does not contain any calls for papers, funding, or job listings.\n\n[Candidate Tags]\n\n<paper>, <public-good>, <equity>, <inequality>, <group-structure>, <externalities>",
 'final_answer': '<paper>, <public-good>, <equity>, <inequality>, <group-structure>, <externalities>',
 'single_tag': [],
 'multi_tag': []}

In [17]:
print(results[0]["reasoning"])

[Reasoning Steps]

1. The post is about a new paper published in Phil Trans B.
2. The paper is authored by Paul Smaldino and Karolina Safarzynska.
3. The paper is about a simple model of a group-structured public good with externalities shifting benefits toward more inequality or more equity.
4. The post does not explicitly endorse or disagree with the paper's content.
5. The post does not contain any calls for papers, funding, or job listings.

[Candidate Tags]

<paper>, <public-good>, <equity>, <inequality>, <group-structure>, <externalities>


In [18]:
# https://python.langchain.com/docs/expression_language/how_to/functions
from langchain_core.runnables import RunnableConfig

In [19]:
config = RunnableConfig(max_concurrency=3)

In [20]:
# make list of 6 inputs 
inputs = inputs * 2
len(inputs)

6

In [28]:
times = []
for ii in inputs:
    start_time = time.time()  
    res = chain.invoke(ii)
    end_time = time.time()
    elapsed_time = end_time - start_time
    times.append(elapsed_time)
    print(elapsed_time)
print(f"Total time: {sum(times)}")


7.781785011291504
13.621722221374512
10.330399990081787
9.885744094848633
21.70015788078308
18.53901195526123
Total time: 81.85882115364075


In [24]:
# 1 batch of 6
start_time = time.time()  
results = await chain.abatch(inputs)
end_time = time.time()
elapsed_time = end_time - start_time
print("\nAll tasks completed in {:.2f} seconds".format(elapsed_time))



All tasks completed in 21.54 seconds


In [23]:
# 2 batches of 3
start_time = time.time()  
results = await chain.abatch(inputs, config=config)
end_time = time.time()
elapsed_time = end_time - start_time
print("\nAll tasks completed in {:.2f} seconds".format(elapsed_time))



All tasks completed in 29.17 seconds


# Can we run a RunnableParallel chain in abatch mode?

In [25]:
# create parallel chain
chain_1 = parser.prompt_case_dict[PromptCase.SINGLE_REF]["chain"]
chain_2 = parser.prompt_case_dict[PromptCase.SINGLE_REF]["chain"]

map_chain = RunnableParallel(semantics=chain_1, keywords=chain_2)

In [26]:
results = await map_chain.abatch(inputs[:3])

In [27]:
results[0]

{'semantics': {'reasoning': '[Reasoning Steps]\n\n1. The post is about a new paper published in Phil Trans B.\n2. The paper is about a group-structured public good with externalities.\n3. The paper discusses how externalities affect cooperation and how only redistributional externalities increase cooperation.\n\n[Candidate Tags]\n\n1. <endorses>: This tag could be used to indicate that the author is endorsing the paper. However, since there is no explicit statement of endorsement, this tag is not the best fit.\n2. <agrees>: This tag could be used to indicate that the author agrees with the conclusions of the paper. However, since there is no explicit statement of agreement, this tag is not the best fit.\n3. <disagrees>: This tag could be used to indicate that the author disagrees with the conclusions of the paper. However, since there is no explicit statement of disagreement, this tag is not the best fit.\n4. <indicates-interest>: This tag could be used to indicate that the author is i

Looks like yes!