In [1]:
# load env with api keys https://stackoverflow.com/a/54028874
%load_ext dotenv
%dotenv ../etc/config.env

import sys
sys.path.append("../")

import os

from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate
from langchain.schema import BaseOutputParser
from langchain.schema import (
    HumanMessage,
)



from twitter import scrape_tweet

In [2]:
TEST_TWEET_1 = "https://twitter.com/danwilliamsphil/status/1719436704602275858"
TEST_TWEET_2 = "https://twitter.com/pwang/status/1719720728184910195"
TEST_TWEET_3 = "https://twitter.com/BlancheMinerva/status/1719714881081954409"
TEST_TWEET_4 = "https://twitter.com/sucholutsky/status/1719725087681569189"

In [3]:
tweet = scrape_tweet(TEST_TWEET_4)
tweet

{'conversationID': '1719725087681569189',
 'date': 'Wed Nov 01 14:36:26 +0000 2023',
 'date_epoch': 1698849386,
 'hashtags': ['NeurIPS2023'],
 'likes': 4,
 'mediaURLs': ['https://pbs.twimg.com/media/F92w1rQXUAAvfHw.jpg'],
 'media_extended': [{'altText': None,
   'size': {'height': 760, 'width': 1247},
   'thumbnail_url': 'https://pbs.twimg.com/media/F92w1rQXUAAvfHw.jpg',
   'type': 'image',
   'url': 'https://pbs.twimg.com/media/F92w1rQXUAAvfHw.jpg'}],
 'possibly_sensitive': False,
 'qrtURL': None,
 'replies': 1,
 'retweets': 3,
 'text': "🧵 Excited to share another new paper with @cocosci_lab, accepted as a spotlight at #NeurIPS2023! 🎉 We delve into the intriguing intersection of AI and human cognition, exploring how alignment with human representations impacts few-shot learning tasks.🧠🤖🎓 Let's unpack this!👇 https://t.co/ayvpHIw76s",
 'tweetID': '1719725087681569189',
 'tweetURL': 'https://twitter.com/sucholutsky/status/1719725087681569189',
 'user_name': 'Ilia Sucholutsky',
 'user_scr

In [4]:
# Create model

OPENROUTER_API_BASE = "https://openrouter.ai/api/v1"
# openai.api_key = os.environ["OPENROUTER_API_KEY"]

model_name = "mistralai/mistral-7b-instruct" # currently free on OpenRouter (https://openrouter.ai/docs#models)
# model_name = "openai/gpt-3.5-turbo"


chat = ChatOpenAI(
        model=model_name, 
        temperature=0.7,
        openai_api_key=os.environ["OPENROUTER_API_KEY"],
        openai_api_base=OPENROUTER_API_BASE,
        headers={"HTTP-Referer": os.environ["OPENROUTER_REFERRER"]}, # To identify your app. Can be set to e.g. http://localhost:3000 for testing
    )

                    headers was transferred to model_kwargs.
                    Please confirm that headers is what you intended.


In [5]:
# based on https://python.langchain.com/docs/get_started/quickstart#prompttemplate--llm--outputparser

class CommaSeparatedListOutputParser(BaseOutputParser):
    """Parse the output of an LLM call to a comma-separated list."""


    def parse(self, text: str):
        """Parse the output of an LLM call."""
        return text.strip().split(", ")
    
class StripOutputParser(BaseOutputParser):
    """Parse the output of an LLM call to a comma-separated list."""


    def parse(self, text: str):
        """Parse the output of an LLM call."""
        return text.strip()

template = """You are an expert annotator who tags social media posts related to academic research, according to a predefined set of tags. 
The available tag types are:
<announce>: this post contains an announcement of new research, likely by the authors. The research may be a paper, dataset or other type of research output that the authors are announcing publicly.
<review>: this post contains a review of another reference, such as a book, article or movie. The review could be positive or negative.
<other>: use this if no other tag is suitable. If you tag a post with <Other>, no other tag should be assigned to the post.

A user will pass in a post, and you should think step by step, before returning a list of comma separated tags that best match the post.

Your final answer should be structured as follows:
# Reasoning steps: (your reasoning steps. For each tag you choose, explain why you chose it.)
# Final answer: (the final list of tags, based on the reasoning steps)

Remember:
The final answer should ONLY include tags from the list above, nothing more. Do not make up any new tags that are not in the list above!
If the <other> tag is included in the answer, no other tag should be included!"""
human_template = "{text}"

chat_prompt = ChatPromptTemplate.from_messages([
    ("system", template),
    ("human", human_template),
])
# chain = chat_prompt | chat | CommaSeparatedListOutputParser()
chain = chat_prompt | chat | StripOutputParser()
answer = chain.invoke({"text": tweet["text"]})
print(answer)

# Reasoning steps:

1. The post is about a new research paper that has been accepted for presentation at NeurIPS 2023.
2. The paper explores the intersection of AI and human cognition.
3. The authors are excited to share the paper with the community.
4. No explicit tag is mentioned in the post, so it falls under the <other> tag.

# Final answer: Other
