<a href="https://colab.research.google.com/github/Flychuban/Stocks-Crypto-Research/blob/main/Stocks_Crypto_Research.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers
!pip install sentencepiece

In [2]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration
from bs4 import BeautifulSoup
import requests

In [3]:
model_name = "human-centered-summarization/financial-summarization-pegasus"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name) 

In [None]:
url = "https://uk.finance.yahoo.com/news/d-put-2-000-tesla-043029225.html"
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
paragraphs = soup.find_all('p')

In [None]:
paragraphs

In [None]:
text = [paragraph.text for paragraph in paragraphs]
words = ' '.join(text).split(' ')[:400]
ARTICLE = ' '.join(words)

In [None]:
ARTICLE

'RCB’s owners have focused on Virat Kohli to leverage their commercial brand Both Tesla (NASDAQ: TSLA) and NIO (NYSE: NIO) stock declined by more than 50% in value in 2022. It was a dreadful year for most growth shares, including electric vehicle (EV) companies. But what if I’d taken a contrarian stance and decided to invest £1,000 in each of these fallen stocks as a New Year gift for myself? How much would I have today? Well, Tesla shares are up a very impressive 65% so far this year. In contrast, NIO shares have declined 17% since the end of December and now sit at just under $8 per share. This means that my Tesla holding would be worth £1,650, while the value of my position in its Chinese EV rival would have fallen to £830. So, my overall investment would be worth £2,480 today. That’s a gain of 24%, which is an exceptional return after just a few months. But what about the future? Should I buy either or both stocks today? There seem to be two big reasons why Tesla stock has come bac

In [None]:
input_ids = tokenizer.encode(ARTICLE, return_tensors = 'pt')
output = model.generate(input_ids, max_length = 55, num_beams = 5, early_stopping = True)
summary = tokenizer.decode(output[0], skip_special_tokens=True)

In [None]:
summary

'Tesla stock is up 65% so far this year, while NIO shares are down 17%.'

In [4]:
monitored_tickers = ['TMUS', 'RCL', 'ETH']

In [5]:
def search_stock_news_urls(ticker):
  search_url = f'https://www.google.com/search?q=yahoo+finance+{ticker}&tbm=nws'
  r = requests.get(search_url)
  soup = BeautifulSoup(r.text, 'html.parser')
  atags = soup.find_all('a')
  hrefs = [link['href'] for link in atags]
  return hrefs

In [6]:
raw_urls = {ticker: search_stock_news_urls(ticker) for ticker in monitored_tickers}

In [None]:
raw_urls

In [8]:
import re

In [9]:
exclude_list = ['maps', 'policies', 'preferences', 'accounts', 'support']

In [10]:
def strip_unwanted_urls(urls, exclude_list):
    val = []
    for url in urls: 
        if 'https://' in url and not any(exclude_word in url for exclude_word in exclude_list):
            res = re.findall(r'(https?://\S+)', url)[0].split('&')[0]
            val.append(res)
    return list(set(val))

In [None]:
cleaned_urls = {ticker:strip_unwanted_urls(raw_urls[ticker], exclude_list) for ticker in monitored_tickers}
cleaned_urls

In [12]:
def scrape_and_process(URLs):
  ARTICLES = []
  for url in URLs:
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    paragraphs = soup.find_all('p')
    text = [paragraph.text for paragraph in paragraphs]
    words = ' '.join(text).split(' ')[:350]
    ARTICLE = ' '.join(words)
    ARTICLES.append(ARTICLE)
  return ARTICLES

In [None]:
articles = {ticker: scrape_and_process(cleaned_urls[ticker]) for ticker in monitored_tickers}
articles

In [14]:
def summarize(articles):
  summaries = []
  for article in articles:
    input_ids = tokenizer.encode(article, return_tensors = 'pt')
    output = model.generate(input_ids, max_length = 55, num_beams = 5, early_stopping = True)
    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summaries.append(summary)
  return summaries

In [None]:
summaries = {ticker: summarize(articles[ticker]) for ticker in monitored_tickers}
summaries

In [16]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

In [17]:
MODEL_NAME = 'RashidNLP/Finance_Multi_Sentiment'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

bert_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels = 3).to(device)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [18]:
def get_sentiment(sentences):
    bert_dict = {}
    vectors = tokenizer(sentences, padding = True, max_length = 65, return_tensors='pt').to(device)
    outputs = bert_model(**vectors).logits
    probs = torch.nn.functional.softmax(outputs, dim = 1)
    for prob in probs:
        bert_dict['neg'] = round(prob[0].item(), 3)
        bert_dict['neu'] = round(prob[1].item(), 3)
        bert_dict['pos'] = round(prob[2].item(), 3)
        print (bert_dict)

In [19]:
scores = {ticker: get_sentiment(summaries[ticker]) for ticker in monitored_tickers}
scores



{'neg': 0.737, 'neu': 0.207, 'pos': 0.056}
{'neg': 0.0, 'neu': 0.001, 'pos': 0.998}
{'neg': 0.082, 'neu': 0.037, 'pos': 0.881}
{'neg': 0.001, 'neu': 0.187, 'pos': 0.812}
{'neg': 0.005, 'neu': 0.919, 'pos': 0.076}
{'neg': 0.0, 'neu': 0.0, 'pos': 1.0}
{'neg': 0.006, 'neu': 0.256, 'pos': 0.738}
{'neg': 0.0, 'neu': 0.0, 'pos': 1.0}
{'neg': 0.005, 'neu': 0.919, 'pos': 0.076}
{'neg': 0.004, 'neu': 0.002, 'pos': 0.994}
{'neg': 0.0, 'neu': 0.002, 'pos': 0.998}
{'neg': 0.0, 'neu': 0.001, 'pos': 0.999}
{'neg': 0.0, 'neu': 0.0, 'pos': 1.0}
{'neg': 0.005, 'neu': 0.919, 'pos': 0.076}
{'neg': 0.0, 'neu': 0.006, 'pos': 0.994}
{'neg': 0.0, 'neu': 0.003, 'pos': 0.997}
{'neg': 0.0, 'neu': 0.003, 'pos': 0.997}
{'neg': 0.0, 'neu': 0.006, 'pos': 0.994}
{'neg': 0.005, 'neu': 0.919, 'pos': 0.076}
{'neg': 0.006, 'neu': 0.256, 'pos': 0.738}
{'neg': 0.005, 'neu': 0.919, 'pos': 0.076}
{'neg': 0.0, 'neu': 0.001, 'pos': 0.999}
{'neg': 0.001, 'neu': 0.03, 'pos': 0.969}
{'neg': 0.99, 'neu': 0.01, 'pos': 0.0}
{'neg':

{'TMUS': None, 'RCL': None, 'ETH': None}