-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebscrap.py
27 lines (20 loc) · 971 Bytes
/
webscrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import urllib.request
from bs4 import BeautifulSoup
class Scraper:
def __init__(self, site):
self.site = site
def scrape(self):
r = urllib.request.urlopen(self.site)
html = r.read()
parser = "html.parser"
sp = BeautifulSoup(html,parser)
for tag in sp.find_all("a"):
url = tag.get("href")
if url is None:
continue
if "articles" in url:
print("\n" + url)
news = "https://news.google.com/"
Scraper(news).scrape()
#Now with this web scraper with Python, you can collect Google News headlines, the possibilities are endless. You can write a program to analyze the most used words in headlines. You can create a program to analyze stock sentiment and see if it correlates with the stock market.
# With this web scraper with Python, all the information in the world is yours, and I hope that turns you on as much as I do.