# Scraping articles from Irish Times website

In [5]:
import requests
from bs4 import BeautifulSoup
import datetime
import pandas as pd


In [48]:
def parseArticle( url ):
    
    page = requests.get(url)
    htmlResponse = page.text
    
    soup = BeautifulSoup(htmlResponse, 'html.parser')
    
    # Ensure article is not 'subscriber only'
    subOnlyElem = soup.find("div", {"class": "intercept-modal"})
    if(subOnlyElem != None):
        return
    
    # Get article title
    headerSectionElem = soup.find("hgroup")
    titleElem = headerSectionElem.find("h1")
    titleText = titleElem.text
    print(titleText)
    
    # Get article date
    timeElem = soup.find("time")
    timeText = timeElem.text
    timeText = timeText[:timeText.rindex(',')]
    dateText = datetime.datetime.strptime(timeText, '%a, %b %d, %Y').strftime('%Y-%m-%d')
    
    # Get article text
    articleElem = soup.find("div", {"class": "article_bodycopy"})
    paragraphElems = articleElem.find_all("p")
    
    paragraphText = ""
    
    for paragraphElem in paragraphElems:
        paragraphText += paragraphElem.text 
        
    data = [[titleText, dateText, paragraphText]]
    df = pd.DataFrame(data, columns=['title', 'date', 'text'])
    df.to_csv('trial2.csv', mode='a', header=False, index=False)
    

In [34]:
def getArticleLinks( url ):
    articleUrls = []
    
    page = requests.get(url)
    htmlResponse = page.text
    
    soup = BeautifulSoup(htmlResponse, 'html.parser')
    searchResultDivs = soup.find_all("div", {"class": "search_items_title"})
    
    for searchResultDiv in searchResultDivs:
            spanElem = searchResultDiv.find("span", {"class":"h2"})
            articleUrls.append('https://www.irishtimes.com' + spanElem.contents[0]['href'])
    
    return articleUrls

In [49]:
baseUrl = "https://www.irishtimes.com/search/search-7.2285082?q=irish+economy&toDate=09-06-2020&pageId=2.709&page="

for i in range(529,634):
    print(str(i))
    articleLinks = getArticleLinks(baseUrl + str(i))
    for link in articleLinks:
        parseArticle(link)
        

529
Cantillon
Europe can be a leader of the 'new industrial revolution'
'We should explore any avenue that will help Ireland stand on its own feet'
Irish service sector grows at slowest rate in six months
Swiss pay cap puts some execs in the soup
Irish consumer sentiment plunges
Services sector slows in February
Troika to devise extension options
Businesses want postcodes introduced
Davy lifts forecast for Irish economy
530
Cantillon
Ireland and Germany see growth in manufacturing
10,000 reasons for optimism
Digicel decision follows familiar pattern
Barroso 'respects' Irish sacrifices
Hopes of economic recovery rise
Little scope for influence at EU meetings
Bank guarantee scheme to end
Small becomes more beautiful to State
France finds 'The Grizz' pulls no punches
531
Deal shows Ireland has travelled 'good distance', says Noonan
Return to health and reform vital to euro zone
Impact to argue against plan for forests
Cantillon
Citibank optimistic on outlook for Ireland
Weidmann warns on 

A reason to cheer as revision finds €2.6bn - just don't get carried away
Current account deficit of €1bn in first quarter
Ireland holding its own in tougher FDI game
Noonan says test is in return to markets
Cantillon
554
Cantillon
NI business conditions disappoint
Dodging the diaspora
Cantillon
US firms have 2,500 vacancies
Slight lift in mood on euro crisis after leaders' latest response to debacle
Weaker euro improves Irish competitiveness
Hard to see downside for Republic in EU's banking union move
Cantillon
National prices predicted to fall
555
Irish exports rise in first quarter
ECB cuts interest rates again
State raises €500m in markets
Services sector shrinks but consumers happier
State almost certain to succeed in raising €500m on markets Economics Editor
Cantillon
Manufacturing employment rises at fastest rate in 12 years
'Healthy' manufacturing numbers
Long-term finance for IBRC may ease Irish return to markets, says Honohan
Cantillon
556
Cantillon
Ireland can do better in at

Cantillon
State should ask ECB to let Anglo defer repayments to Central Bank
576
Jobless 'shoehorned' into inappropriate training
Contentious Kenny remarks perceived as tough but truthful
Learning lessons from internal budgetary models
Credit where it's due
Milking the markets
Spirit of invention
Cantillon
Department of Finance rules lack flexibility, says fiscal watchdog
Ireland 'should not be test ground' for austerity 'experiment'
Bond market mirrors a topsy-turvy financial world
577
First sale of Irish bonds since 2010
Noonan insists Anglo debt will be paid
Chief executives downbeat on prospects
State's borrowing costs fall amid EU talks on debt easing
Cantillon
Noonan for talks with EU, ECB on Irish bank debt
Dismissing bailout may be tactical and not empty bravado
Cantillon
EU to speed up plan for common tax base
Rip-off Ireland alive in some sectors
578
Germany questions pay restraint model
Consumer prices last year rose by 2.5%, says CSO
Check out Prague for common sense busine

Sutherland says State should try to cut over €3.6bn from deficit
Latest CSO data suggests trend towards narrowing deficit
Rationale behind ECB opposition to burning bondholders
Cantillon
'Renegotiation of bailout' needed
IMF calls for recapitalisation of European banks
Ex-central bank adviser cautions on European bonds
597
IMF report paints pessimistic growth picture and urges greater action
Private sector profits from public sector wastefulness
IMF predicts growth slowdown
Davy cuts economic forecasts
Cantillon
A degree of stability revealed in job figures
Ireland still retains the potential to recover quickly
Arthur's Day pulling them in
CityJet pushes key message of customer service
598
Little things
Cantillon
Moody's downgrades French banks
Effect of Greek default could put Lehmans in ha'penny place
Former chancellor warns euro zone teetering on brink
Cantillon
Croke Park review could be off table as report says reform is progressing well
Markets take fright as ECB official quits i

Economy forecast to grow in 2011
617
March heralds big rise in consumer optimism
US lacks credible stability plan - IMF
Surprise rise in consumer sentiment
Cantillon
IMF cuts projected growth for Ireland to 0.5%
IMF cuts Irish growth forecast
Cantillon
Time will tell if contagion contained as patients must wait for nicer medicine
Plan to fix Portugal's bailout deal ahead of election
Market behaviour gives impression we are getting blasé about bailouts
618
Government will only be able to blame predecessor 'for the next nine months'
Trichet insists decision will not hurt Ireland
Euro zone's one-size-fits-all policy leaves Ireland vulnerable
Mortgage holders take hit as ECB increases interest rates
Portuguese crash damages hopes for better deal on rates
Cut in bailout debt rate fair and vital for sustainability, says Barroso
Gap between tax take and spending soars to over €7bn
Investors 'should buy Irish sovereign risk'
Cantillon
Roubini warns of bailout risks
619
S&P lowers Ireland's rat

### CSV Structure
id | url | date | text

In [11]:
parseArticle("https://www.irishtimes.com/business/economy/christine-lagarde-calls-for-ambitious-actions-as-ecb-disappoints-1.4201089")

<div class="article_bodycopy">
<p class="no_name"><a class="search" href="/topics/topics-7.1213540?article=true&amp;tag_organisation=European+Central+Bank">European Central Bank</a> (ECB) president <a class="search" href="/topics/topics-7.1213540?article=true&amp;tag_person=Christine+Lagarde">Christine Lagarde</a> called on governments to launch an “ambitious and coordinated” financial response in light of the “major shock” to economic growth caused by Covid-19, as her institution disappointed financial markets by leaving its key interest rates unchanged.</p>
<p class="no_name">“Governments and all other policy institutions are called upon to take timely and targeted actions to address the public health challenge of containing the spread of the coronavirus and mitigate its economic risk,” Ms Lagarde said at a press conference on Thursday in Frankfurt.</p>
<p class="no_name">“In particular, an ambitious and coordinated fiscal policy response is required to support businesses and workers

In [23]:
getArticleLinks("https://www.irishtimes.com/search/search-7.2285082?q=irish+economy&toDate=09-06-2020&pageId=2.709&page=529")

['https://www.irishtimes.com/business/economy/cantillon-1.1319332',
 'https://www.irishtimes.com/business/economy/europe/europe-can-be-a-leader-of-the-new-industrial-revolution-1.1319323',
 'https://www.irishtimes.com/business/economy/europe/we-should-explore-any-avenue-that-will-help-ireland-stand-on-its-own-feet-1.1319349',
 'https://www.irishtimes.com/business/economy/world/irish-service-sector-grows-at-slowest-rate-in-six-months-1.1318166',
 'https://www.irishtimes.com/business/economy/swiss-pay-cap-puts-some-execs-in-the-soup-1.1317400',
 'https://www.irishtimes.com/business/economy/ireland/irish-consumer-sentiment-plunges-1.1318319',
 'https://www.irishtimes.com/business/economy/ireland/services-sector-slows-in-february-1.1318352',
 'https://www.irishtimes.com/business/economy/ireland/troika-to-devise-extension-options-1.1318281',
 'https://www.irishtimes.com/business/economy/ireland/businesses-want-postcodes-introduced-1.1317055',
 'https://www.irishtimes.com/business/economy/ir