In [23]:
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd

# Barclays Mortgages

See: https://www.barclays.co.uk/mortgages/mortgage-calculator/cost-calculator/#/cost

In [2]:
url = 'https://www.barclays.co.uk/dss/service/co.uk/mortgages/costcalculator/productservice'

In [5]:
estimatedPropertyValue, repaymentAmount, months, wantTo = 200000, 150000, 240, 'FTBP'

In [6]:
data = {"header": {"flowId":"0"},
            "body":
            {"wantTo": wantTo,
             "estimatedPropertyValue": estimatedPropertyValue,
             "borrowAmount": repaymentAmount,
             "interestOnlyAmount": 0,
             "repaymentAmount": repaymentAmount,
             "ltv": round(repaymentAmount/estimatedPropertyValue*100),
             "totalTerm": months,
             "purchaseType": "Repayment"}}

In [8]:
r = requests.post(url, json=data)
results = r.json()    

In [9]:
print(results)

{'header': {'result': 'error', 'systemError': {'errorCode': 'DSS_SEF001', 'type': 'E', 'severity': 'FRAMEWORK', 'errorMessage': 'State details not found in database', 'validationErrors': [], 'contentType': 'application/json', 'channel': '6'}}}


In [14]:
headers = {
    # These are non-typical headers, let's include them
    'currentState': 'default_current_state',
    'action': 'default',
    'Origin': 'https://www.barclays.co.uk',
    # Spoof referer, user agent, and X-Requested-With
    'Referer': 'https://www.barclays.co.uk/mortgages/mortgage-calculator/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest',
}

In [15]:
r = requests.post(url, json=data, headers=headers)
results = r.json()    

print(results)

{'header': {'result': 'success', 'errorState': {'name': ['default_error_state'], 'page': 'default_error_page'}, 'successState': {'name': ['default_success_state'], 'page': 'default_success_page'}, 'flowId': '0'}, 'body': {'success': True, 'timeStamp': '2019-03-20T09:55+0000', 'errorMessage': None, 'mortgages': [{'mortgageName': '5 Year Fixed with ��1000 cashback (Purchase Only)', 'mortgageId': '1321127853346', 'ctaType': None, 'uniqueId': '5bd9ae4c68d62b6d6c75096a', 'mortgageType': 'FIXED', 'howMuchCanBeBorrowedNote': '95% (max) of the value of your home', 'initialRate': 3.63, 'initialRateTitle': '3.63%', 'initialRateNote': 'until 30th April 2024', 'followOnRate': 4.24, 'followOnRateTitle': '4.24% (BEBR + 3.49%)', 'followOnRateNote': 'variable for the remaining term *', 'overallCost': 4.1, 'overallCostTitle': '4.1% APRC', 'overallCostNote': 'APRC', 'minLtv': 0.0, 'maxLtv': 95.0, 'applicationFee': '0', 'earlyRepaymentCharges': '3 % of the balance repaid until 30 April 2024', 'minLoanAmo

# Reuters Board Members

In [17]:
session = requests.Session()

In [18]:
sp500 = 'https://www.reuters.com/finance/markets/index/.SPX'

In [20]:
page = 1
regex = re.compile(r'\/finance\/stocks\/overview\/.*')
symbols = []

In [21]:
while True:
    print('Scraping page:', page)
    params = {'sortBy': '', 'sortDir' : '', 'pn': page}
    html = session.get(sp500, params=params).text
    soup = BeautifulSoup(html, "html.parser")
    pagenav = soup.find(class_='pageNavigation')
    if not pagenav:
        break
    companies = pagenav.find_next('table', class_='dataTable')
    for link in companies.find_all('a', href=regex):
        symbols.append(link.get('href').split('/')[-1])
    page += 1

Scraping page: 1
Scraping page: 2
Scraping page: 3
Scraping page: 4
Scraping page: 5
Scraping page: 6
Scraping page: 7
Scraping page: 8
Scraping page: 9
Scraping page: 10
Scraping page: 11
Scraping page: 12
Scraping page: 13
Scraping page: 14
Scraping page: 15
Scraping page: 16
Scraping page: 17
Scraping page: 18


In [22]:
symbols[:3]

['MMM.N', 'AOS.N', 'ABT.N']

In [24]:
officers = 'https://www.reuters.com/finance/stocks/company-officers/{symbol}'

In [25]:
dfs = []

In [26]:
for symbol in symbols[:10]:
    print('Scraping symbol:', symbol)
    html = session.get(officers.format(symbol=symbol)).text
    soup = BeautifulSoup(html, "html.parser")
    officer_table = soup.find('table', {"class" : "dataTable"})
    df = pd.read_html(str(officer_table), header=0)[0]
    df.insert(0, 'symbol', symbol)
    dfs.append(df)

df = pd.concat(dfs)

Scraping symbol: MMM.N
Scraping symbol: AOS.N
Scraping symbol: ABT.N
Scraping symbol: ABBV.N
Scraping symbol: ABMD.OQ
Scraping symbol: ACN.N
Scraping symbol: ATVI.OQ
Scraping symbol: ADNT.N
Scraping symbol: ADBE.OQ
Scraping symbol: AMD.OQ


In [27]:
df.head()

Unnamed: 0,symbol,Name,Age,Since,Current Position
0,MMM.N,Inge Thulin,65.0,2018.0,"Executive Chairman of the Board, President"
1,MMM.N,Michael Roman,59.0,2018.0,"Chief Executive Officer, Director"
2,MMM.N,Hak Shin,61.0,2017.0,Vice Chairman and Executive Vice President
3,MMM.N,Nicholas Gangestad,53.0,2014.0,"Chief Financial Officer, Senior Vice President"
4,MMM.N,James Bauman,58.0,2017.0,"Executive Vice President, Industrial Business ..."
