In [None]:
import os
import time
from langchain.chains import MapReduceDocumentsChain, LLMChain, ReduceDocumentsChain, StuffDocumentsChain
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
os.cpu_count()

16

In [None]:
# Load LLM
config = {'max_new_tokens': 4096, 'temperature': 0.7, 'context_length': 4096}
llm = CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
                    model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
                    config=config,
                    threads=os.cpu_count())

Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 9554.22it/s]
Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 8774.69it/s]


In [None]:
# Reduce template and chain
predict_sentiment_template = """<s>[INST] As a market stock expert assess sentiment(positive, negative, neutral) related to Apple Company for following text.
Only evaluate based on the provided text: {headline}
write short one word answer.
Answer:  [/INST] </s>"""

predict_sentiment_prompt = PromptTemplate.from_template(predict_sentiment_template)
predict_sentiment_chain = LLMChain(llm=llm, prompt=predict_sentiment_prompt)

In [None]:
from langchain.docstore.document import Document

headline = """Apple reportedly abandons its electric car project as EV makers struggle"""

docs =  [Document(page_content=headline, metadata={"source": "local"})]

# split_docs = text_splitter.split_documents(docs)

print(docs)

[Document(page_content='Apple reportedly abandons its electric car project as EV makers struggle', metadata={'source': 'local'})]


In [None]:
# Run the chain
start_time = time.time()
result = predict_sentiment_chain.__call__(docs, return_only_outputs=True)
time_taken = time.time() - start_time
print(result, time_taken)

{'text': ' Negative'} 6.687000274658203


In [None]:
import pandas as pd

df_cnbc = pd.read_csv('cnbc_scraper/cnbc_headlines.csv', header = None, names = ['headline', 'date'])

df_cnbc.head(10)

Unnamed: 0,headline,date
0,A nutritionist shares her simple diet for opti...,4/13/2024
1,How climate change is beginning to be built in...,4/13/2024
2,"Apple’s franchise is very strong, speculation ...",4/12/2024
3,Google removing links to California news websi...,4/12/2024
4,Wall Street heads into the week ahead on edge ...,4/12/2024
5,The stock market’s decline nears a level that ...,4/12/2024
6,Trade Tracker: Bill Baruch buys more Apple,4/12/2024
7,Here are Friday’s biggest analyst calls: Apple...,4/12/2024
8,5 things to know before the stock market opens...,4/12/2024
9,Jim Cramer talks keeping faith in Nvidia and A...,4/12/2024


In [None]:
headlines = df_cnbc['headline'].values
headlines

array(['A nutritionist shares her simple diet for optimal health—’I’m not trying to be the most ...',
       'How climate change is beginning to be built into employee pay and benefits',
       'Apple’s franchise is very strong, speculation about its demise is ‘overblown’: Melius’ Ben Reitzes',
       ...,
       'Dow jumps more than 200 points to record close to start 2022, Tesla boosts Nasdaq ...',
       'Amid another Covid surge, schools and businesses find plans disrupted once again',
       '2022 will be the biggest year for the metaverse so far'],
      dtype=object)

In [None]:
def predict_sentiment(headline):
  docs =  [Document(page_content=headline, metadata={"source": "local"})]
  start_time = time.time()
  result = predict_sentiment_chain.__call__(docs, return_only_outputs=True)
  time_taken = time.time() - start_time
  print(result, time_taken)
  return result

In [24]:
results = []
for headline in headlines:
    results.append(predict_sentiment(headline))

{'text': ' Neutral (The text does not mention anything about Apple Inc., it is about a nutritionist sharing her diet.)'} 10.21340274810791
{'text': " Neutral. The text does not express any clear sentiment towards Apple or its stock. It only mentions a trend related to climate change and employment, which is not directly related to the company's financial performance or stock price."} 15.021794080734253
{'text': ' Positive.'} 4.4508466720581055
{'text': ' Neutral (The text does not directly mention Apple.)'} 6.203562498092651


KeyboardInterrupt: 

In [None]:
len(results)

6115

In [None]:
sentiments = []
for result in results:
    text = result['text'].lower()
    if 'positive' in text:
        sentiments.append('positive')
    elif 'neutral' in text:
        sentiments.append('neutral')
    elif 'negative' in text:
        sentiments.append('negative')
    else:
        sentiments.append('neutral')
        
print(sentiments)
len(sentiments)

['neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'positive', 'positive', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'positive', 'negative', 'positive', 'positive', 'positive', 'neutral', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'negative', 'positive', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'negative', 'neutral', 'positiv

6115

In [None]:
df_cnbc['sentiment'] = sentiments
df_cnbc

Unnamed: 0,headline,date,sentiment
0,A nutritionist shares her simple diet for opti...,4/13/2024,neutral
1,How climate change is beginning to be built in...,4/13/2024,neutral
2,"Apple’s franchise is very strong, speculation ...",4/12/2024,positive
3,Google removing links to California news websi...,4/12/2024,neutral
4,Wall Street heads into the week ahead on edge ...,4/12/2024,neutral
...,...,...,...
6110,Apple’s wearable gadget business grew like gan...,1/3/2022,positive
6111,Kantrowitz: As Apple hits $3 trillion in marke...,1/3/2022,neutral
6112,Dow jumps more than 200 points to record close...,1/3/2022,neutral
6113,"Amid another Covid surge, schools and business...",1/1/2022,neutral


In [None]:
df_cnbc.to_csv('cnbc_with_sentiments.csv', index=False)