### Installing Dependencies

In [1]:
!pip install textblob
!pip install googletrans==4.0.0rc1

Collecting googletrans==4.0.0rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
Collecting httpx==0.13.3
  Downloading httpx-0.13.3-py3-none-any.whl (55 kB)
[K     |████████████████████████████████| 55 kB 2.4 MB/s 
Collecting rfc3986<2,>=1.3
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)
Collecting sniffio
  Downloading sniffio-1.2.0-py3-none-any.whl (10 kB)
Collecting hstspreload
  Downloading hstspreload-2021.11.1-py3-none-any.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 15.5 MB/s 
[?25hCollecting httpcore==0.9.*
  Downloading httpcore-0.9.1-py3-none-any.whl (42 kB)
[K     |████████████████████████████████| 42 kB 1.2 MB/s 
Collecting h2==3.*
  Downloading h2-3.2.0-py2.py3-none-any.whl (65 kB)
[K     |████████████████████████████████| 65 kB 3.4 MB/s 
[?25hCollecting h11<0.10,>=0.8
  Downloading h11-0.9.0-py2.py3-none-any.whl (53 kB)
[K     |████████████████████████████████| 53 kB 2.0 MB/s 
[?25hCollecting hpack<4,>=3.0
  Downloading hpack-3.0.0-py

### Translation Example

In [2]:
from textblob import TextBlob as tb
from googletrans import Translator

#print(result.src)
#print(result.dest)
#print(result.origin)
#print(result.text)
#print(result.pronunciation)

translator = Translator()

result = translator.translate("O que é isso e quando vamos fazer isso", src='pt', dest='en')
print(result.text)


What is this and when we're going to do this


### Sites to Scrap

In [3]:
import requests
from bs4 import BeautifulSoup

sites = [('Globo','https://g1.globo.com/'), ('Uol','https://noticias.uol.com.br/'), ('O Antagonista','https://www.oantagonista.com/')]

soups = []

for site in sites:
  paginaHTML = requests.get(site[1])
  soups.append(paginaHTML.text)
  #soups.append(BeautifulSoup(paginaHTML.text, 'html.parser'))
  #print(soup)

In [4]:
def get_materias(soup:str,beacon:str, stop_character:chr, init_character='', add_characters=0):
  textinho = soup

  materias = []

  index_resposta = textinho.find(beacon)

  while index_resposta != -1:
    character_atual = index_resposta+len(beacon)+1
    if(init_character != ''):
      while textinho[character_atual] != init_character:
        character_atual+=1
      character_atual+=add_characters
    
    materia = ""
    while textinho[character_atual] != stop_character:
      materia += textinho[character_atual]
      character_atual += 1

    materias.append(materia)
    index_resposta = textinho.find(beacon, index_resposta+1) 
  return materias


In [5]:
def get_materias_oantagonista(soup):
  beacon = '<div class="article_link"><a href="https'
  stop_character = '"'
  init_character = '"'
  add_characters = 9
  return get_materias(soup,beacon,stop_character,init_character,add_characters)

In [6]:
def get_materias_uol(soup):
  beacon = 'class="thumb-title title-xsmall title-lg-small"'
  character = '<'
  return get_materias(soup,beacon,character)

In [7]:
def get_materias_g1(soup):
  beacon = '"species":"Matéria","title":'
  character = '"'
  return get_materias(soup,beacon,character)

In [9]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [11]:
doc = nlp("Hello guys, how are you all today?.")
sentence = list(doc.sents)[0]

print('SENTS:\n')
for s in sentence:
  print(s)
  print(s.ent_type_)
  print(s.pos_)
  print('----------')

print('\nENTS:\n')
for d in doc.ents:
  print(d.text, d.label_)

print('\nNOUN_CHUNKS:\n')
for n in doc.noun_chunks:
  print(n.text)


SENTS:

Hello

INTJ
----------
guys

NOUN
----------
,

PUNCT
----------
how

ADV
----------
are

AUX
----------
you

PRON
----------
all

DET
----------
today
DATE
NOUN
----------
?

PUNCT
----------
.

PUNCT
----------

ENTS:

today DATE

NOUN_CHUNKS:

you


### Show Latest News

In [12]:
materias_g1 = get_materias_g1(soups[0])
materias_uol = get_materias_uol(soups[1])
materias_oantagonista = get_materias_oantagonista(soups[2])

for materia in materias_g1:
  result = translator.translate(materia, src='pt')
  print(result.text)



Ex-Charlie Brown Jr. Calls the son of Chorão de 'Arrogant'
'There are days that I only cry', says Mouth Rosa about entrepreneurship
'Superbebê' is born with 7 kg and does not fit in the incubator
Brazil's taller man will have to amputate leg
Anvisa approves HIV treatment of only one tablet
We test smartphones that cost up to R $ 2,000
Airfryer: how to choose your 'fryer' without oil
Lamp, lock, porter: The basic kit of the smart house
Headphones to work and play: See options for R $ 250 to R $ 2,000
Meet products that transform your TV into Smart
Skate: Learn how to choose the ideal model
Pix serve and PIX Change start valer;Understand the rules
OMICRON: What is known about the new coronavirus variant
Who is entitled to unemployment insurance?How do you ask?
Vale-gas: Understand the program and who will
Learn if you can have discount on the light account
Mobile average deaths is in 227;Total passes from 614 thousand
More than 133 million are complete vaccination, 62.44% of the populati

In [13]:
def show_sentiment(materias):
  for materia in materias:
    result = translator.translate(materia, src='pt')
    titulo = tb(result.text)
    if(titulo.polarity != 0 or titulo.subjectivity != 0):
      print(f'Titulo: {materia} \nSentimento: {titulo.sentiment}\n')

show_sentiment(materias_oantagonista)
print(len(materias_oantagonista))

Titulo: Moro fala 
Sentimento: Sentiment(polarity=0.13636363636363635, subjectivity=0.5)

Titulo: Orçamento secreto continua secreto 
Sentimento: Sentiment(polarity=-0.4, subjectivity=0.7)

Titulo: Um limite para o orçamento secreto 
Sentimento: Sentiment(polarity=-0.4, subjectivity=0.7)

Titulo: Moro dispara na terceira via 
Sentimento: Sentiment(polarity=0.06818181818181818, subjectivity=0.25)

Titulo: Ciro Gomes e o efeito Moro 
Sentimento: Sentiment(polarity=0.13636363636363635, subjectivity=0.5)

Titulo: Moro e a política econômica de Jair Bolsonaro 
Sentimento: Sentiment(polarity=0.16818181818181818, subjectivity=0.35)

Titulo: Lula diz que vai interferir na Petrobras e culpa Lava Jato por alta nos preços 
Sentimento: Sentiment(polarity=0.16, subjectivity=0.5399999999999999)

Titulo: Lula e Alckmin desmarcam reunião prevista para amanhã 
Sentimento: Sentiment(polarity=0.10000000000000002, subjectivity=0.3833333333333333)

Titulo: Saiba como cada parlamentar votou no 'golpe do orç

In [None]:
help(tb)

In [28]:
phrase = tb("The sun is yellow and I am quite mellow")
print(type(phrase))

<class 'textblob.blob.TextBlob'>


In [27]:
sub = tb("Earth is my planet")
print(f'Subjectivity: {sub.subjectivity}')
print(f'Polarity: {sub.polarity}')

Subjectivity: 0.0
Polarity: 0.0
