In [1]:
import os
import openai

In [3]:
from langchain_openai import OpenAI, ChatOpenAI

from langchain.prompts import PromptTemplate, ChatPromptTemplate

from langchain.output_parsers import CommaSeparatedListOutputParser

from langchain_experimental.agents.agent_toolkits import create_python_agent

from langchain_experimental.tools.python.tool import PythonREPLTool

In [4]:
with open('financial_headlines.txt','r') as file:
    headlines = file.readlines()
headlines

["Finnish Aktia Group 's operating profit rose to EUR 17.5 mn in the first quarter of 2010 from EUR 8.2 mn in the first quarter of 2009 .\n",
 'Finnish measuring equipment maker Vaisala Oyj HEL : VAIAS said today that its net loss widened to EUR4 .8 m in the first half of 2010 from EUR2 .3 m in the corresponding period a year earlier .\n',
 'Finnish pharmaceuticals company Orion reports profit before taxes of EUR 70.0 mn in the third quarter of 2010 , up from EUR 54.9 mn in the corresponding period in 2009 .\n',
 'Tiimari , the Finnish retailer , reported to have geenrated quarterly revenues totalling EUR 1.3 mn in the 4th quarter 2009 , up from EUR 0.3 mn loss in 2008 .\n',
 "Finnish Metso Paper has been awarded a contract for the rebuild of Sabah Forest Industries ' ( SFI ) pulp mill in Sabah , Malaysia .\n",
 'Finnish Outokumpu Technology has been awarded several new grinding technology contracts .']

In [5]:
headlines =[line.strip('\n') for line in headlines]
headlines

["Finnish Aktia Group 's operating profit rose to EUR 17.5 mn in the first quarter of 2010 from EUR 8.2 mn in the first quarter of 2009 .",
 'Finnish measuring equipment maker Vaisala Oyj HEL : VAIAS said today that its net loss widened to EUR4 .8 m in the first half of 2010 from EUR2 .3 m in the corresponding period a year earlier .',
 'Finnish pharmaceuticals company Orion reports profit before taxes of EUR 70.0 mn in the third quarter of 2010 , up from EUR 54.9 mn in the corresponding period in 2009 .',
 'Tiimari , the Finnish retailer , reported to have geenrated quarterly revenues totalling EUR 1.3 mn in the 4th quarter 2009 , up from EUR 0.3 mn loss in 2008 .',
 "Finnish Metso Paper has been awarded a contract for the rebuild of Sabah Forest Industries ' ( SFI ) pulp mill in Sabah , Malaysia .",
 'Finnish Outokumpu Technology has been awarded several new grinding technology contracts .']

In [6]:
prompt_template = PromptTemplate.from_template(
    template="Analyze the following financial headline for sentiment: {headline}",
)

formatted_template = prompt_template.format(headline=headlines[0])
formatted_template

"Analyze the following financial headline for sentiment: Finnish Aktia Group 's operating profit rose to EUR 17.5 mn in the first quarter of 2010 from EUR 8.2 mn in the first quarter of 2009 ."

In [9]:
system_message = """You are performing sentiment analysis on news headlines regarding financial analysis. 
    This sentiment is to be used to advice financial analysts. 
    The format of the output has to be consistent. 
    The output is strictly limited to any of the following options: [positive, negative, neutral]."""

chat_template = ChatPromptTemplate.from_messages([
    ("system",system_message),
    ("human","Analyze the following financial headline for sentiment: {headline}")
])

formatted_chat_template = chat_template.format_messages(
    headline= headlines[0]
)

formatted_chat_template

[SystemMessage(content='You are performing sentiment analysis on news headlines regarding financial analysis. \n    This sentiment is to be used to advice financial analysts. \n    The format of the output has to be consistent. \n    The output is strictly limited to any of the following options: [positive, negative, neutral].', additional_kwargs={}, response_metadata={}),
 HumanMessage(content="Analyze the following financial headline for sentiment: Finnish Aktia Group 's operating profit rose to EUR 17.5 mn in the first quarter of 2010 from EUR 8.2 mn in the first quarter of 2009 .", additional_kwargs={}, response_metadata={})]

In [None]:
client =OpenAI()

completion_chain = prompt_template | client

completion_chain.invoke({"headline":headlines[0]})

In [None]:
# Define a chat client model. Assign to chat.
chat = ChatOpenAI()

# Pipe the chat template to the client. Assign to chat_chain.
chat_chain = chat_template | chat

# Invoke chat_chain, setting headline to the first headline and using system_message
chat_chain.invoke({"headline": headlines[0]}, {"system_message": system_message})

In [None]:
# Instantiate the output parser.
output_parser = CommaSeparatedListOutputParser()

# Get the format instructions from the output parser.
format_instructions = output_parser.get_format_instructions()

# Instantiate a new prompt template with the format instructions.
company_name_template = PromptTemplate(
    template="List all the company names from the following headlines, limited to one name per headline: {headlines}.\n{format_instructions}",
    input_variables=["headlines"],
    partial_variables={"format_instructions": format_instructions}
)

# Format the prompt using all headlines.
formatted_company_name_template = company_name_template.format(headlines=headlines)

In [None]:
# Instantiate a Langchain OpenAI Model object.
model = OpenAI(temperature=0)

# Invoke the model on the input.
_output = model.invoke(formatted_company_name_template)

# Parse the output.
company_names = output_parser.parse(_output)

# Print the data type the parsed output.
print(f"Data type: {type(company_names)}\n")

# Print the output.
print(company_names)

In [None]:
# Instantiate a Python agent, with the PythonREPLTool.
agent_executor = create_python_agent(
    llm=OpenAI(temperature=0, max_tokens=1000),
    tool=PythonREPLTool(),
    verbose=True
)

# Ask the agent for the solution of a mathematical equation.
agent_executor.invoke("What is the square root of 250? Round the answer down to 4 decimals.")

In [None]:
agent_executor.invoke(f"""For every of the following headlines, extract the company name and whether the financial sentiment is   positive, neutral or negative. 
   Load this data into a pandas dataframe. 
   The dataframe will have three columns: the name of the company, whether the financial sentiment is positive or negative and the headline itself. 
   The dataframe can then be saved in the current working directory under the name financial_analysis.csv.
   If a csv file already exists with the same name, it should be overwritten.

   The headlines are the following:
   {headlines}
   """)

In [None]:
# Make the necessary import.
import pandas as pd

# Load the CSV file into a dataframe.
df = pd.read_csv("financial_analysis.csv")

# Print the dataframe.
df

In [None]:
# Create a new prompt template with output parsing.
sentiment_template = PromptTemplate(
    template="Get the financial sentiment of each of the following headlines. The output is strictly limited to any of the following options: ['Positive', 'Negative', 'Neutral']: {headlines}.\n{format_instructions}",
    input_variables=["headlines"],
    partial_variables={"format_instructions": format_instructions}
)

# Format the prompt template.
formatted_sentiment_template = sentiment_template.format(headlines=headlines)

# Invoke the model on the formatted prompt template.
_output = model.invoke(formatted_sentiment_template)

# Parse the output.
sentiments = output_parser.parse(_output)

# Print the list of sentiments.
sentiments

In [10]:
def visulize(headline,sentiment):
    assert len(headlines) == len(sentiment)
    for i,_ in enumerate(headlines):
        print(f"{sentiment[i]}:{headlines[i]}")
#visulize(sentiments,headlines)

In [None]:
sentiment_examples = """
    If a company is doing financially better than before, the sentiment is positive. For example, when profits or revenue have increased since the last quarter or year, exceeding expectations, a contract is awarded or an acquisition is announced.
    If the company's profits are decreasing, losses are mounting up or overall performance is not meeting expectations, the sentiment is negative.
    If nothing positive or negative is mentioned from a financial perspective, the sentiment is neutral.
"""

sentiment_template = PromptTemplate(
    template="Get the financial sentiment of each of the following headlines. {few_shot_examples} The output is strictly limited to any of the following options: ['Positive', 'Negative', 'Neutral']: {headlines}.\n{format_instructions}",
    input_variables=["headlines", "few_shot_examples"],
    partial_variables={"format_instructions": format_instructions}
)

# Format the template.
formatted_sentiment_template = sentiment_template.format(headlines=headlines, few_shot_examples=sentiment_examples)

# Invoke the model on the formatted template.
_output = model.invoke(formatted_sentiment_template)

# Parse the model output.
sentiments = output_parser.parse(_output)

# Visualize the result.
visulize(headlines, sentiments)


In [None]:
agent_executor.invoke(f"""Create a dataframe with two columns: company_name, sentiment and headline.
To fill the dataframe, use the following lists respectively: {str(company_names)}, {str(sentiments)} and {str(headlines)}. 
The dataframe can then be saved in the current working directory under the name financial_analysis_with_parsing.csv.
If a csv file already exists with the same name, it should be overwritten.
""")

In [None]:
# Load the CSV file into a dataframe.
df = pd.read_csv("financial_analysis_with_parsing.csv")

# Print the dataframe.
df

In [None]:
with open('reddit_comments.txt','r') as file:
    comments = file.readlines()

In [None]:
# Pick a comment.
comment = comments[0]

# Define an OpenAI model. Assign to client.
client = openai.OpenAI()

# Send the comment to the Moderation API.
moderation_output = client.moderations.create(input=comment)

# Optionally print the comment.
# print(comment)

# Print the output.
moderation_output

In [None]:
pd.DataFrame(moderation_output.results[0].dict())[["categories", "category_scores"]]