In [None]:
!pip install langchain langchain-community langchain-groq langchain-ollama

In [None]:
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.chains import LLMChain
from langchain_ollama.llms import OllamaLLM

import pandas as pd
import json
import os
import numpy as np
import subprocess
from datetime import datetime

In [None]:
# set GROQ_API_KEY key

In [None]:
final_df = pd.read_csv("news.csv") # this is the file obtained as a result of newsdatapreprocessing

In [None]:
def get_chain():

  llm = OllamaLLM(model="llama3.1")
  # llm = ChatGroq(model="llama3-8b-8192")

  template = """
  You are a helpful assistant specialized in keywords extraction and sentiment detection from a news article. Answer the following question:

  Article: {article}

  Question: {question}

  Answer:
  """

  prompt = PromptTemplate(
      input_variables=["question", "article"],
      template=template,
  )

  chain = prompt | llm
  return chain

In [None]:
def get_sentiment(chain, article):

  question = """
  1. What is the sentiment about the companies mentioned this article? Mention the sentiment only.
  2. Which company/companies were affectes by this news article? Mention the company names only. If no company is mentioned, return "None".
  3. Write ticker symbols for the companies mentioned in the article. If no company is mentioned, return "None".

  Answer should strictly be in the format:
  1. Sentiment: Sentiment
  2. Companies: Company1, Company2, ...
  3. TickerSymbols: TickerSymbol1, TickerSymbol2, ...

  Do not include any other text.
  """

  input_data = {
    'article': article,
    'question': question,
  }
  r = chain.invoke(input = input_data)
  print(r)
  return r

In [None]:
chain = get_chain()
for index, row in final_df.iterrows():
    if(not pd.isna(final_df.at[index, 'sentiment'])):
      continue
    article = row['article']
    response = get_sentiment(chain, article)
    lines = response.strip().split('\n')
    sentiment = lines[0].strip().split(':')[1].strip()
    if sentiment == "Positive":
      sentiment = 1
    elif sentiment == "Negative":
      sentiment = -1
    else:
      sentiment = 0
    final_df.at[index, 'sentiment'] = sentiment

    print(index)

    if(pd.isna(final_df.at[index, 'tickerSymbol'])):
      try:
        tickerSymbol = lines[1].strip().split(':')[1].strip()
        final_df.at[index, 'tickerSymbol'] = tickerSymbol
      except:
        final_df.at[index, 'tickerSymbol'] = "None"

    if(pd.isna(final_df.at[index, 'companyName'])):
      try:
        companies = lines[2].strip().split(':')[1].strip()
        final_df.at[index, 'companyName'] = companies
      except:
        final_df.at[index, 'companyName'] = "None"

In [None]:
final_df.to_csv("news_with_sentiment.csv", index=False)