In [1]:
from langchain.agents.agent_toolkits import create_python_agent
from langchain.agents import load_tools, initialize_agent
from langchain.agents import AgentType
from langchain.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL
from langchain.chat_models import PromptLayerChatOpenAI
from langchain.chains import SimpleSequentialChain, LLMChain, SequentialChain
from langchain import PromptTemplate
from langchain.document_loaders import CSVLoader
from langchain.memory.simple import SimpleMemory
from langchain.document_loaders import RedditPostsLoader
from langchain.llms import PromptLayerOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DocArrayInMemorySearch

import promptlayer
import random
import os
import json

# Parameters

In [14]:
#Register for a Reddit app API key and input the CLIENT_ID and CLIENT_SECRET here
REDDIT_CLIENT_ID='<REDDIT_CLIENT_ID>'
REDDIT_CLIENT_SECRET='<REDDIT_CLIENT_SECRET>'

#Download your Twitter archive data export and copy the CSV into the same folder as this notebook.
#See here for instructions on how to obtain it: https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive
TWEET_ARCHIVE_CSV_FILE='<TWITTER DATA EXPORT>.csv'

promptlayer.api_key = "<PROMPTLAYER API KEY>"
openai = promptlayer.openai
gpt_model = "gpt-4-32k"

# Load Latest Headlines from Reddit
The following code retrieves the top 20 Reddit posts from the r/worldnews sub-reddit, and concatenates them toegether into
a newline separated string.

In [4]:
# load using 'subreddit' mode
loader = RedditPostsLoader(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent="AutoTweeter2 Notebook",
    categories=["hot"],  # List of categories to load posts from
    mode="subreddit",
    search_queries=[
        "worldnews"
    ],  # List of subreddits to load posts from
    number_posts=20,  # Default value is 10
)
reddit_posts = loader.load()
reddit_posts_objs = [{"id":post.metadata["post_id"],"title":post.metadata["post_title"],"url":post.metadata["post_url"]} for post in reddit_posts]

#now we select N topics from the array
number_to_sample=10
selected_reddit_posts=random.sample(reddit_posts_objs,number_to_sample)

selected_reddit_posts_as_text=""
for value in selected_reddit_posts:
    selected_reddit_posts_as_text += "\n\n"+value["title"]
    
print(selected_reddit_posts_as_text)



Researchers in Denmark extract ancient DNA from a 2,900-year-old clay brick, revealing a time capsule of plant life

France delivers first batch of SCALP long-range missiles to Ukraine, says ambassador

Videos of an artillery ambush show Russian tanks and armor making 'opening day mistakes' and paying the price for them

Finnish foreign minister Valtonen: "Russia has ruined its relationship with Finland"

Japanese farmer has fought for decades to stay on his ancestral land in the middle of Narita airport

Chinese activist Kwon Pyong, a critic of Xi Jinping, flees to South Korea on jet ski

African Union suspends Niger over coup

Billboards across London reveal an estimated 1 in 20 UK flights running on Russian oil

/r/WorldNews Live Thread: Russian Invasion of Ukraine Day 545, Part 1 (Thread #691)

Putin says Russia faces big economic challenges, must keep inflation in check


# Load Tweet History from CSV File
The following loads all of the Twitter user's previous Tweet history into an In-Memory Vector store, and then performs a similarity search against that for any Tweets which are similar to any of the new Reddit headlines that were extracted in the previous step.

In [6]:
file = TWEET_ARCHIVE_CSV_FILE
loader = CSVLoader(file_path=file,encoding="utf8", csv_args={'delimiter':',','quotechar': '"','fieldnames':['Tweet']})
tweets = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_tweets = text_splitter.split_documents(tweets)

embeddings = OpenAIEmbeddings()
db = DocArrayInMemorySearch.from_documents(
    split_tweets, 
    embeddings
)

query = "Please return Tweets that relate to any of the following topics: " + selected_reddit_posts_as_text
docs = db.similarity_search(query)
reference_tweets = "\n".join([docs[i].page_content for i in range(len(docs))])

print(reference_tweets)


Tweet: now that Sochi is over, i bet we'll see Putin lay some Red Army pipe on the Ukraine.
Tweet: oh those Chechens...
Tweet: how long will Kofi Annan keep running between China, Syria and Moscow?
Tweet: um, is that an air raid siren that just went off in Brooklyn? the Russians are coming!!


# Tweet Generation via LLM

In the next step, both the newly queried Reddit headlines, alongside the reference Tweets identified in the vector store of past Tweet history, are then passed to a method that calls the LLM and generates a new set of Tweets. Rather than a single prompt to create the Tweets, AutoTweeter leverages two individual LLMChain objects sequenced together in LangChain’s SequentialChain construct first to generate the Tweets, then apply a final filter used to moderate against inappropriate or insensitive remarks.

## Chain 1: Generate Tweet Chain using Reddit Posts and Past Tweets as Context

In [8]:
template = """
Please generate {number_of_tweets_to_generate} tweets that relate to the topics delimited by <> and are similar in tone to the examples provided below. Tweets should be insightful and humorous. Tweets must not use hashtags. Format your answer as a list of Tweets, with each individual Tweet on its own line. 

% START OF EXAMPLE TWEETS TO MIMIC
{example_tweets}
% END OF EXAMPLE TWEETS TO MIMC

<{topics}>

"""

retriever = db.as_retriever()
llm = PromptLayerChatOpenAI(model="gpt-3.5-turbo", pl_tags=["generate-tweets", "no-agent"])
number_of_tweets_to_generate = 10

prompt_template = PromptTemplate(input_variables=["number_of_tweets_to_generate","example_tweets","topics"],template=template)
generator_chain = LLMChain(llm=llm,prompt=prompt_template,output_key="proposed_tweets")


## Chain 2: Moderate Tweets

In [9]:
template2 ="""
Delimited in <> are a set of proposed Tweets, please filter out any tweets which may be offensive or insensitive.
<{proposed_tweets}>

"""

prompt_template2 = PromptTemplate(input_variables=["proposed_tweets"], template=template2)
moderation_chain = LLMChain(llm=llm, prompt=prompt_template2,output_key="final_tweets")

## Create SequentialChain Containing Chain 1 & 2 and Run 

In [13]:
overall_chain = SequentialChain(
    chains=[generator_chain,moderation_chain],
    input_variables=["number_of_tweets_to_generate", "example_tweets", "topics"],
    output_variables=["final_tweets"],
    verbose=True)

result = overall_chain.run({"number_of_tweets_to_generate":number_of_tweets_to_generate, "example_tweets":reference_tweets, "topics":selected_reddit_posts_as_text})
print(result)



[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m
Researchers in Denmark uncover ancient secrets in a 2,900-year-old brick - forget time travel, this is the real deal!

French sends Ukraine a gift - SCALP missiles. It's like a twisted game of missile monopoly.

Russian tanks learn the hard way that ambushes and opening day mistakes don't mix. #Oops

Finland has officially unfriended Russia. It's complicated.

Japanese farmer's airport battle is like a real-life version of "The Terminal." Tom Hanks would be proud.

Chinese activist takes "Jet Ski Escape" to a whole new level. Action movie in the making?

African Union suspends Niger for bad behavior. No dessert for you!

Billboards in London reveal Russia's oil is fueling more than just airplanes, it's fueling controversy.

/r/WorldNews brings us another thrilling chapter in the never-ending Russian invasion saga. Grab your popcorn, folks!

Putin faces economic challenges and inflation. Maybe he should try sel