## Import, Define Variables, Use Requests to Capture HTML Code, Find Relevant Tags

In [1]:
# import libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
from pymongo import MongoClient
from pprint import pprint

In [2]:
# define main reusable variables
main_url = 'https://www.nasdaq.com'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36', "Upgrade-Insecure-Requests": "1","DNT": "1","Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8","Accept-Language": "en-US,en;q=0.5","Accept-Encoding": "gzip, deflate"}

In [3]:
page = requests.get('https://www.nasdaq.com/news-and-insights/topic/markets/page/1', timeout=5, headers=headers)

In [4]:
soup = BeautifulSoup(page.text, 'html.parser')

In [5]:
soup


<!DOCTYPE html>

<html dir="ltr" lang="en" prefix="content: http://purl.org/rss/1.0/modules/content/  dc: http://purl.org/dc/terms/  foaf: http://xmlns.com/foaf/0.1/  og: http://ogp.me/ns#  rdfs: http://www.w3.org/2000/01/rdf-schema#  schema: http://schema.org/  sioc: http://rdfs.org/sioc/ns#  sioct: http://rdfs.org/sioc/types#  skos: http://www.w3.org/2004/02/skos/core#  xsd: http://www.w3.org/2001/XMLSchema# ">
<head>
<meta charset="utf-8"/>
<script>window.digitalData = {"pageInstanceID":"production:home:news-and-insights:topic:markets","page":{"pageInfo":{"author":null,"breadcrumbs":["home","news-and-insights","topic","markets"],"destinationURL":"https:\/\/www.nasdaq.com\/news-and-insights\/topic\/markets?reqid=15e1b1f3-502a-4d3b-87e1-ba2b88c1b0fa","entryURL":null,"lang":"en","onsiteSearchResults":null,"onsiteSearchTerm":null,"pageID":"home:news-and-insights:topic:markets","pageName":"markets","pagePathLevel1":"news-and-insights","pagePathLevel2":"news-and-insights:topic","pagePath

In [6]:
a_tags = soup.find_all("a", class_='content-feed__card-title-link')

In [7]:
soup.find_all("a", class_='content-feed__card-title-link')

[<a class="content-feed__card-title-link" href="/articles/asian-shares-mostly-higher-as-inflation-rate-worries-ease">Asian Shares Mostly Higher As Inflation, Rate Worries Ease </a>,
 <a class="content-feed__card-title-link" href="/articles/indonesia-bourse-poised-to-halt-slide">Indonesia Bourse Poised To Halt Slide </a>,
 <a class="content-feed__card-title-link" href="/articles/higher-open-called-for-hong-kong-stock-market-5">Higher Open Called For Hong Kong Stock Market </a>,
 <a class="content-feed__card-title-link" href="/articles/tevas-partner-alvotech-receives-complete-response-letter-for-avt02-bla">Teva's Partner Alvotech Receives Complete Response Letter For AVT02 BLA </a>,
 <a class="content-feed__card-title-link" href="/articles/china-stock-market-may-reclaim-thursdays-losses">China Stock Market May Reclaim Thursday's Losses </a>,
 <a class="content-feed__card-title-link" href="/articles/renewed-support-anticipated-for-taiwan-shares">Renewed Support Anticipated For Taiwan Shar

## Loop to Scrape Data from HTML Code

In [8]:
names = []
urls = []

for i in range(len(a_tags)):
    names.append(a_tags[i].text)
    urls.append(a_tags[i]['href'])
    print('TITLE: ' + a_tags[i].text + ', URL: ' + main_url + a_tags[i]['href'])
    

TITLE: Asian Shares Mostly Higher As Inflation, Rate Worries Ease , URL: https://www.nasdaq.com/articles/asian-shares-mostly-higher-as-inflation-rate-worries-ease
TITLE: Indonesia Bourse Poised To Halt Slide , URL: https://www.nasdaq.com/articles/indonesia-bourse-poised-to-halt-slide
TITLE: Higher Open Called For Hong Kong Stock Market , URL: https://www.nasdaq.com/articles/higher-open-called-for-hong-kong-stock-market-5
TITLE: Teva's Partner Alvotech Receives Complete Response Letter For AVT02 BLA , URL: https://www.nasdaq.com/articles/tevas-partner-alvotech-receives-complete-response-letter-for-avt02-bla
TITLE: China Stock Market May Reclaim Thursday's Losses , URL: https://www.nasdaq.com/articles/china-stock-market-may-reclaim-thursdays-losses
TITLE: Renewed Support Anticipated For Taiwan Shares , URL: https://www.nasdaq.com/articles/renewed-support-anticipated-for-taiwan-shares
TITLE: Singapore Bourse May Reclaim 3,300-Point Level , URL: https://www.nasdaq.com/articles/singapore-bo

In [9]:
names

['Asian Shares Mostly Higher As Inflation, Rate Worries Ease ',
 'Indonesia Bourse Poised To Halt Slide ',
 'Higher Open Called For Hong Kong Stock Market ',
 "Teva's Partner Alvotech Receives Complete Response Letter For AVT02 BLA ",
 "China Stock Market May Reclaim Thursday's Losses ",
 'Renewed Support Anticipated For Taiwan Shares ',
 'Singapore Bourse May Reclaim 3,300-Point Level ',
 'VR App Developers Caught in Push-Pull Between Meta, ByteDance',
 'Bloomberg and Amazon Jump On the ChatGPT Train',
 'Malaysia Stock Market Poised To End Losing Streak ',
 '']

In [10]:
urls

['/articles/asian-shares-mostly-higher-as-inflation-rate-worries-ease',
 '/articles/indonesia-bourse-poised-to-halt-slide',
 '/articles/higher-open-called-for-hong-kong-stock-market-5',
 '/articles/tevas-partner-alvotech-receives-complete-response-letter-for-avt02-bla',
 '/articles/china-stock-market-may-reclaim-thursdays-losses',
 '/articles/renewed-support-anticipated-for-taiwan-shares',
 '/articles/singapore-bourse-may-reclaim-3300-point-level',
 '/articles/vr-app-developers-caught-in-push-pull-between-meta-bytedance',
 '/articles/bloomberg-and-amazon-jump-on-the-chatgpt-train',
 '/articles/malaysia-stock-market-poised-to-end-losing-streak',
 '']

## Clean Names & Urls Lists

In [11]:
names.remove('')
urls.remove('')

## Bring Data into a Dataframe for MongoDB Export

In [12]:
to_mongo = pd.DataFrame()

In [13]:
to_mongo = to_mongo.append({'article_title': names[0], 'article_url': (main_url+urls[0])}, ignore_index=True)

  to_mongo = to_mongo.append({'article_title': names[0], 'article_url': (main_url+urls[0])}, ignore_index=True)


In [14]:
to_mongo

Unnamed: 0,article_title,article_url
0,"Asian Shares Mostly Higher As Inflation, Rate ...",https://www.nasdaq.com/articles/asian-shares-m...


In [15]:
to_mongo = pd.DataFrame()

for i in range(len(names)):
    to_mongo = to_mongo.append({'article_title': names[i], 'article_url': (main_url+urls[i])}, ignore_index=True)

  to_mongo = to_mongo.append({'article_title': names[i], 'article_url': (main_url+urls[i])}, ignore_index=True)
  to_mongo = to_mongo.append({'article_title': names[i], 'article_url': (main_url+urls[i])}, ignore_index=True)
  to_mongo = to_mongo.append({'article_title': names[i], 'article_url': (main_url+urls[i])}, ignore_index=True)
  to_mongo = to_mongo.append({'article_title': names[i], 'article_url': (main_url+urls[i])}, ignore_index=True)
  to_mongo = to_mongo.append({'article_title': names[i], 'article_url': (main_url+urls[i])}, ignore_index=True)
  to_mongo = to_mongo.append({'article_title': names[i], 'article_url': (main_url+urls[i])}, ignore_index=True)
  to_mongo = to_mongo.append({'article_title': names[i], 'article_url': (main_url+urls[i])}, ignore_index=True)
  to_mongo = to_mongo.append({'article_title': names[i], 'article_url': (main_url+urls[i])}, ignore_index=True)
  to_mongo = to_mongo.append({'article_title': names[i], 'article_url': (main_url+urls[i])}, ignore_inde

In [16]:
to_mongo

Unnamed: 0,article_title,article_url
0,"Asian Shares Mostly Higher As Inflation, Rate ...",https://www.nasdaq.com/articles/asian-shares-m...
1,Indonesia Bourse Poised To Halt Slide,https://www.nasdaq.com/articles/indonesia-bour...
2,Higher Open Called For Hong Kong Stock Market,https://www.nasdaq.com/articles/higher-open-ca...
3,Teva's Partner Alvotech Receives Complete Resp...,https://www.nasdaq.com/articles/tevas-partner-...
4,China Stock Market May Reclaim Thursday's Losses,https://www.nasdaq.com/articles/china-stock-ma...
5,Renewed Support Anticipated For Taiwan Shares,https://www.nasdaq.com/articles/renewed-suppor...
6,"Singapore Bourse May Reclaim 3,300-Point Level",https://www.nasdaq.com/articles/singapore-bour...
7,VR App Developers Caught in Push-Pull Between ...,https://www.nasdaq.com/articles/vr-app-develop...
8,Bloomberg and Amazon Jump On the ChatGPT Train,https://www.nasdaq.com/articles/bloomberg-and-...
9,Malaysia Stock Market Poised To End Losing Str...,https://www.nasdaq.com/articles/malaysia-stock...


## Export to Mongodb

In [17]:
mongo = MongoClient(port=27017)

In [18]:
db = mongo.nasdaq_insights

In [19]:
headlines = db.headlines

In [20]:
articles_dict = to_mongo.to_dict('records') #prep to load into mongodb
articles_dict

[{'article_title': 'Asian Shares Mostly Higher As Inflation, Rate Worries Ease ',
  'article_url': 'https://www.nasdaq.com/articles/asian-shares-mostly-higher-as-inflation-rate-worries-ease'},
 {'article_title': 'Indonesia Bourse Poised To Halt Slide ',
  'article_url': 'https://www.nasdaq.com/articles/indonesia-bourse-poised-to-halt-slide'},
 {'article_title': 'Higher Open Called For Hong Kong Stock Market ',
  'article_url': 'https://www.nasdaq.com/articles/higher-open-called-for-hong-kong-stock-market-5'},
 {'article_title': "Teva's Partner Alvotech Receives Complete Response Letter For AVT02 BLA ",
  'article_url': 'https://www.nasdaq.com/articles/tevas-partner-alvotech-receives-complete-response-letter-for-avt02-bla'},
 {'article_title': "China Stock Market May Reclaim Thursday's Losses ",
  'article_url': 'https://www.nasdaq.com/articles/china-stock-market-may-reclaim-thursdays-losses'},
 {'article_title': 'Renewed Support Anticipated For Taiwan Shares ',
  'article_url': 'https:

In [21]:
headlines.insert_many(articles_dict)

<pymongo.results.InsertManyResult at 0x7f86e1af0c70>

In [22]:
results = list(headlines.find())

In [23]:
# check if data was correctly loaded
for r in results:
    pprint(r)

{'_id': ObjectId('643b0b08d5fa5619ee09442a'),
 'article_title': 'Asian Shares Mostly Higher As Inflation, Rate Worries Ease ',
 'article_url': 'https://www.nasdaq.com/articles/asian-shares-mostly-higher-as-inflation-rate-worries-ease'}
{'_id': ObjectId('643b0b08d5fa5619ee09442b'),
 'article_title': 'Indonesia Bourse Poised To Halt Slide ',
 'article_url': 'https://www.nasdaq.com/articles/indonesia-bourse-poised-to-halt-slide'}
{'_id': ObjectId('643b0b08d5fa5619ee09442c'),
 'article_title': 'Higher Open Called For Hong Kong Stock Market ',
 'article_url': 'https://www.nasdaq.com/articles/higher-open-called-for-hong-kong-stock-market-5'}
{'_id': ObjectId('643b0b08d5fa5619ee09442d'),
 'article_title': "Teva's Partner Alvotech Receives Complete Response Letter "
                  'For AVT02 BLA ',
 'article_url': 'https://www.nasdaq.com/articles/tevas-partner-alvotech-receives-complete-response-letter-for-avt02-bla'}
{'_id': ObjectId('643b0b08d5fa5619ee09442e'),
 'article_title': "China St