In [2]:
from lxml import html
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import re

#This is a collection of functions that can take a Yahoo finance page and return finance information took from the website.

def CurrentPrice(url,Curr):
    #This is a simple function that will take a url from yahoo finance and output the current value
    r=requests.get(url)
    c=r.content
    soup=BeautifulSoup(c)
    t=soup.find('span',{'class':'Trsdu(0.3s) Trsdu(0.3s) Fw(b) Fz(36px) Mb(-4px) D(b)'})
    #We now need to use regex to remove any commas if they occur.
    t = re.sub(r'[\,]+', '', t.text)

        
    
    
    #We have to scale the data in case it is stored in pounds or pence.
    Scal=1
    if Curr=="P":
        Scal=100
    

    
    return float(t)*Scal

#We now generalize the above function so that given a list of urls it will output a list of current prices.
#It takes arguements in the form of a 2d vector of a url and the currency it is valued in.
def PsFromURL(*args):
    l=len(args)
    P=np.zeros(l)
    for i in range(l):
        P[i]=CurrentPrice(args[i][0],args[i][1])
    return P



In [3]:
def LastClosePrice(url,Curr):
    #This function will output the previous close of a stock displayed on yahoo.
    r=requests.get(url)
    c=r.content
    soup=BeautifulSoup(c)
    t=soup.find('span',{'class':'Trsdu(0.3s) '})
    t = re.sub(r'[\,]+', '', t.text)
    Scal=1
    if Curr=="P":
        Scal=100

    return float(t)*Scal


def CsFromURL(*args):
    l=len(args)
    P=np.zeros(l)
    for i in range(l):
        P[i]=LastClosePrice(args[i][0],args[i][1])
    return P


In [4]:
def SummaryToHistory(URL):
    #This takes a yahoo summary page and converts it to the history page.
    #This is to make out later code less confusing, by only needing to specify one page, instead of two.
    l=URL.find("?")
    A=URL[:l]
    B="/history"
    C=URL[l:]
    A+=B
    A+=C
    return(A)


In [5]:
def HistoricalData(URL,Curr):
    #This function takes a url from Yahoo Finance summary page of a share and outputs it as a numpy list
    #NB that on yahoo finance, markets like the ftse100 are displayed in GBP
    #Where as the individual components are priced in pence. We shall evaluate everything in pence.
    URL=SummaryToHistory(URL)
    html=requests.get(URL).content
    df_list=pd.read_html(html)
    lis1=df_list[-2]
    j=0
    
    for i in range(len(lis1)):
        if np.isnan(lis1.iloc[i,4])==1:
            j=j+1
    PL=np.zeros(len(lis1)-j)
    #We have now collected the price list and created a zero array of equal length, minus the dividend
    #dates.

    
    Scal=1
    if Curr=="P":
        Scal=100
    #We have now set a scaling factor, so that we everything will be output in pence.    
    
    j=0

    for i in range(len(lis1)):

        if np.isnan(lis1.iloc[i,4])==0:
        

            PL[i-j]=float(lis1.iloc[i,4]*Scal)
        else:
            j=j+1
        
    #We have now generated a numpy array in pence of the historical prices and removed all dividend information.

    return PL

#We now extend the above function so that it can take a list of [stock, currecny] and output historic data in pence.

def HistoriesFromURL(*args):
    l=len(args)
    P=[]
    for i in range(l):
        P.append(HistoricalData(args[i][0],args[i][1]))
    return P


In [25]:
def PricesToReturns(*args):
    #Takes a collection of history prcies and outputs a collection of returns prices.
    l=len(args)
    T=[]
    for i in range(l):
        A=args[i]
        m=len(A)

        R=[]
        for j in range(m-1):
            
            R.append(A[j]/A[j+1]-1)
        T.append(R)
    return T

In [26]:
Barc=['https://uk.finance.yahoo.com/quote/BCS?p=BCS','p']
HSBC=['https://uk.finance.yahoo.com/quote/HSBA.L?p=HSBA.L','p']
Llo=['https://uk.finance.yahoo.com/quote/LLOY.L?p=LLOY.L','p']
RBS=['https://uk.finance.yahoo.com/quote/RBS?p=RBS','p']
Big4=[Barc,HSBC,Llo,RBS]
H=HistoriesFromURL(*Big4)

PricesToReturns(*H)

[[-0.005714285714285783,
  -0.01241534988713311,
  0.012571428571428456,
  0.0011441647597254523,
  0.008073817762399127,
  0.01760563380281699,
  0.009478672985782088,
  0.007159904534606021,
  -0.007109004739336289,
  -0.0011834319526626835,
  -0.012850467289719725,
  -0.0034924330616995514,
  0.023837902264600697,
  0.00962695547533099,
  0.03875000000000006,
  -0.0024937655860348684,
  -0.025516403402187193,
  0.013546798029556717,
  -0.0012300123001232066,
  -0.004895960832313251,
  -0.02389486260453999,
  -0.02674418604651163,
  -0.02714932126696834,
  0.007981755986317118,
  0.0,
  0.008045977011494232,
  0.004618937644341736,
  0.01762632197414815,
  0.009489916963226541,
  0.016887816646562248,
  -0.015439429928741144,
  -0.023201856148491795,
  -0.008055235903337232,
  0.010465116279069653,
  0.004672897196261516,
  -0.011547344110854452,
  -0.029147982062780242,
  0.03121387283236987,
  0.045949214026602236,
  0.013480392156862697,
  -0.02508960573476693,
  -0.00238379022646