<h1>Desafio Quantamental Itaú - 2023</h1>
<h3>Lamia - Modelo 03</h3>
<h4>NLTK-SentimentIntensityAnalyzer on FinViz.com</h4>

Este modelo tem o objetivo de realizar uma análise de sentimento nas ações mais voláteis da bolsa, assim, com notícias boas temos mais chance de faturar um capital maior.

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.downloader.download('vader_lexicon')


In [None]:
def get_news(ticker):
    url = f'https://finviz.com/quote.ashx?t={ticker}'
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    news_table = soup.find(id='news-table')
    return news_table



In [None]:
def parse_news(news_table, ticker):
    parsed_news = []
    
    for x in news_table.findAll('tr'):
        try:
            text = x.a.get_text() 
            date_scrape = x.td.text.split()

            if len(date_scrape) == 1:
                time = date_scrape[0]
            else:
                date = date_scrape[0]
                time = date_scrape[1]

            parsed_news.append([date, time, ticker, text])        
        except:
            pass
			
    columns = ['date', 'time', 'ticker', 'headline']
    parsed_news_df = pd.DataFrame(parsed_news, columns=columns)        
    parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])
    return parsed_news_df



In [None]:
def score_news(parsed_news_df):
    vader = SentimentIntensityAnalyzer()
    scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
    scores_df = pd.DataFrame(scores)
    parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')             
    parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')    
    parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'], 1)          
    parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
    return parsed_and_scored_news



In [None]:
def analyze_sentiment_by_days(tickers):
    all_news_df = pd.DataFrame(columns=['date', 'time', 'ticker', 'headline', 'datetime'])
    
    for ticker in tickers:
        news_table = get_news(ticker)
        parsed_news_df = parse_news(news_table, ticker)
        all_news_df = all_news_df.append(parsed_news_df)
        
    scored_news_df = score_news(all_news_df)
    return scored_news_df.resample('D').mean()



In [None]:
# Lista de tickers que você deseja analisar
tickers_list = ['AAPL', 'GOOGL', 'MSFT', 'BTCUSD', 'ETHUSD']

sentiment_by_days = analyze_sentiment_by_days(tickers_list)
print(sentiment_by_days)