In [None]:
# https://www.kaggle.com/datasets/notlucasp/financial-news-headlines/

In [1]:
import os
import re
import json
import csv
import datetime
from langchain.prompts import PromptTemplate
from genai.credentials import Credentials
import os
from dotenv import load_dotenv
# Using Generative AI Library
from genai.model import Model
from genai.schemas import GenerateParams
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
# Suppress all warnings
import warnings
warnings.simplefilter("ignore")

In [2]:
# read the csv file data/reuters_headlines.csv into a dataframe
df = pd.read_csv('data/reuters_headlines.csv',nrows=100)
df.head()

Unnamed: 0,Headlines,Time,Description
0,TikTok considers London and other locations fo...,Jul 18 2020,TikTok has been in discussions with the UK gov...
1,Disney cuts ad spending on Facebook amid growi...,Jul 18 2020,Walt Disney has become the latest company to ...
2,Trail of missing Wirecard executive leads to B...,Jul 18 2020,Former Wirecard chief operating officer Jan M...
3,Twitter says attackers downloaded data from up...,Jul 18 2020,Twitter Inc said on Saturday that hackers were...
4,U.S. Republicans seek liability protections as...,Jul 17 2020,A battle in the U.S. Congress over a new coron...


In [3]:
df.describe()

Unnamed: 0,Headlines,Time,Description
count,100,100,100
unique,100,3,100
top,TikTok considers London and other locations fo...,Jul 17 2020,TikTok has been in discussions with the UK gov...
freq,1,48,1


In [4]:
load_dotenv()
api_key = os.getenv("GENAI_KEY")
api_url = os.getenv("GENAI_API")

creds = Credentials(api_key, api_endpoint=api_url) # credentials object to access the LLM service

In [5]:
# a helper function to generate text
def get_completion(sample, prompt_string, model):
    prompt_template = PromptTemplate.from_template(prompt_string)
    prompt=prompt_template.format(sample=sample)
    result=model.generate([prompt])[0].generated_text
    print(sample)
    print(result)
    print(" ")
    return result

# get sentiment

In [6]:
# define model type
#MODELTYPE = "meta-llama/llama-2-70b-chat"
MODELTYPE = "ibm/granite-13b-chat-v1"
# MODELTYPE = "ibm/granite-13b-sft"


# Instantiate parameters for text generation
params = GenerateParams(
    decoding_method="sample", # use 'greedy' alternatively
    max_new_tokens=1000,
    min_new_tokens=1,
    temperature=0.5,
    repetition_penalty=1.2,
    top_k=50,
    top_p=1,
)

# Instantiate a model proxy object to send your requests
granite_13_chat_model = Model(MODELTYPE, params=params, credentials=creds)

In [7]:
prompt_string_sentiment_analysis="""
Analyze the sentiment of the following financial article.Determine whether the sentiment is positive, negative, or neutral. Answer with only one word!: 

financial article:
"{sample}" 

sentiment:
"""

In [8]:
# iterate over the rows of the dataframe and get_completion for each Description and store the result in a new column called 'sentiment'
# df['sentiment'] = df['Description'].apply(get_completion,prompt_string,granite_13_chat_model)
df['sentiment'] = df['Description'].apply(lambda x: get_completion(x, prompt_string_sentiment_analysis, granite_13_chat_model))


TikTok has been in discussions with the UK government over the past few months to locate its headquarters in London, a source familiar with the matter said, as part of a strategy to distance itself from its Chinese ownership.
neutral
 
Walt Disney  has become the latest company to slash its advertising spending on Facebook Inc  as the social media giant faces an ad boycott over its handling of hate speech and controversial content, the Wall Street Journal reported on Saturday, citing people familiar with the situation.
negative
 
Former Wirecard  chief operating officer Jan Marsalek travelled to Minsk soon after he was suspended and may still be in Belarus or Russia, a German magazine reported on Saturday.
negative
 
Twitter Inc said on Saturday that hackers were able to download account information for up to eight accounts involved in the hack of its systems this week, but said none of them were verified accounts.
negative
 
A battle in the U.S. Congress over a new coronavirus-aid bil

In [41]:
df.head()

Unnamed: 0,Headlines,Time,Description,sentiment
0,TikTok considers London and other locations fo...,Jul 18 2020,TikTok has been in discussions with the UK gov...,positive
1,Disney cuts ad spending on Facebook amid growi...,Jul 18 2020,Walt Disney has become the latest company to ...,negative
2,Trail of missing Wirecard executive leads to B...,Jul 18 2020,Former Wirecard chief operating officer Jan M...,negative
3,Twitter says attackers downloaded data from up...,Jul 18 2020,Twitter Inc said on Saturday that hackers were...,negative
4,U.S. Republicans seek liability protections as...,Jul 17 2020,A battle in the U.S. Congress over a new coron...,positive


# extract named entities

In [14]:
# define model type
MODELTYPE = "meta-llama/llama-2-70b-chat"
#MODELTYPE = "ibm/granite-13b-instruct-v1"
# MODELTYPE = "ibm/granite-13b-sft"


# Instantiate parameters for text generation
params = GenerateParams(
    decoding_method="greedy", # use 'greedy' alternatively
    # max_new_tokens=1000,
    # min_new_tokens=1,
    # temperature=0.5,
    # repetition_penalty=1.2,
    # top_k=50,
    # top_p=1,
)

# Instantiate a model proxy object to send your requests
# granite_13_instruct_model = Model(MODELTYPE, params=params, credentials=creds)
llame_2_70b_model = Model(MODELTYPE, params=params, credentials=creds)

In [15]:
prompt_string_named_entities="""
Act as a webmaster who must extract structured information from emails. Read the below email and extract and categorize each entity.


Input:
"Golden Bank is a competitor of Silver Bank in the US" said John Doe.

Output: (Named Entities)
Golden Bank: company, Silver Bank: company, US: country, John Doe: person

Input:
Alphabet Inc's Google said on Friday it would prohibit websites and apps that use its advertising technology from running ads on "dangerous content" that goes against scientific consensus during the coronavirus pandemic.

Output: (Named Entities)
Alphabet Inc: company,  Google: company division, Friday: day of the week, coronavirus pandemic: event

Input:
{sample}

Output: (Named Entities)
"""

In [17]:
df.head()   

Unnamed: 0,Headlines,Time,Description,sentiment
0,TikTok considers London and other locations fo...,Jul 18 2020,TikTok has been in discussions with the UK gov...,neutral
1,Disney cuts ad spending on Facebook amid growi...,Jul 18 2020,Walt Disney has become the latest company to ...,negative
2,Trail of missing Wirecard executive leads to B...,Jul 18 2020,Former Wirecard chief operating officer Jan M...,negative
3,Twitter says attackers downloaded data from up...,Jul 18 2020,Twitter Inc said on Saturday that hackers were...,negative
4,U.S. Republicans seek liability protections as...,Jul 17 2020,A battle in the U.S. Congress over a new coron...,positive


In [None]:
result="TikTok: company, UK government: organization, London: location, China: country"

In [21]:
import re

input_str = "TikTok: company, UK government: organization, London: location, China: country"

def parse_string_to_touple_list(input_str):
    # Regular expression pattern to match 'key: value' pairs
    # Adjust the pattern as needed to handle different formats
    pattern = r'(\w[\w\s]*?)\s*:\s*([\w\s]+)'

    # Find all matches and convert them to tuples
    tuples_list = re.findall(pattern, input_str)
    
    return tuples_list



# Display the list of tuples
print(parse_string_to_touple_list(input_str))	

[('TikTok', 'company'), ('UK government', 'organization'), ('London', 'location'), ('China', 'country')]


In [24]:
df_copy = df.copy()
# Initialize the 'named_entities' column with default values
df_copy['named_entities'] = [None] * len(df_copy)

for i in range(0,10):
    named_entities=get_completion(df_copy['Description'][i], prompt_string_named_entities, llame_2_70b_model)
    # parse the named_entities string into a list of tuples and store it in a new column called 'named_entities'
    df_copy['named_entities'][i] = parse_string_to_touple_list(named_entities)
    
df_copy.head(10)

TikTok has been in discussions with the UK government over the past few months to locate its headquarters in London, a source familiar with the matter said, as part of a strategy to distance itself from its Chinese ownership.
TikTok: company, UK government: organization, London: location, China: country

 
Walt Disney  has become the latest company to slash its advertising spending on Facebook Inc  as the social media giant faces an ad boycott over its handling of hate speech and controversial content, the Wall Street Journal reported on Saturday, citing people familiar with the situation.
Walt Disney: company, Facebook Inc: company, Saturday: day of the week, Wall Street
 
Former Wirecard  chief operating officer Jan Marsalek travelled to Minsk soon after he was suspended and may still be in Belarus or Russia, a German magazine reported on Saturday.
Wirecard: company, Jan Marsalek: person, Minsk: city, Belarus
 
Twitter Inc said on Saturday that hackers were able to download account i

Unnamed: 0,Headlines,Time,Description,sentiment,named_entities
0,TikTok considers London and other locations fo...,Jul 18 2020,TikTok has been in discussions with the UK gov...,neutral,"[(TikTok, company), (UK government, organizati..."
1,Disney cuts ad spending on Facebook amid growi...,Jul 18 2020,Walt Disney has become the latest company to ...,negative,"[(Walt Disney, company), (Facebook Inc, compan..."
2,Trail of missing Wirecard executive leads to B...,Jul 18 2020,Former Wirecard chief operating officer Jan M...,negative,"[(Wirecard, company), (Jan Marsalek, person), ..."
3,Twitter says attackers downloaded data from up...,Jul 18 2020,Twitter Inc said on Saturday that hackers were...,negative,"[(Twitter Inc, company), (Saturday, day of the..."
4,U.S. Republicans seek liability protections as...,Jul 17 2020,A battle in the U.S. Congress over a new coron...,positive,"[(Congress, organization), (Republicans, polit..."
5,Wall Street Week Ahead: Fund managers navigate...,Jul 17 2020,Investors are searching for bargains in the wo...,negative,"[(caps, financial market), (coronavirus pandem..."
6,Take Five: Hoping for that V-shape in earnings,Jul 17 2020,We knew Q2 earnings would be dire but the hope...,negative,"[(Q2, quarter), (European, region), (plus, amo..."
7,Evictions nearly back to pre-pandemic levels i...,Jul 17 2020,Landlords in some areas of the United States a...,positive,"[(Federal Reserve Bank of Cleveland, organizat..."
8,Google bans ads on coronavirus conspiracy theo...,Jul 17 2020,Alphabet Inc's Google said on Friday it would ...,positive,"[(Alphabet Inc, company), (Google, company div..."
9,"Flight to suburbs boosts U.S. homebuilding, bu...",Jul 17 2020,U.S. homebuilding increased in June by the mos...,positive,"[(June, month), (19 pandemic, event)]"


## extract key actions