In [1]:
import pandas as pd
import os
import bs4
import requests
import boto3
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


# These are pre-determined for now but can be overwritten
BANKING_CODES = {
    "Barclays": 0,
    "Natwest": 23,
    "Nationwide": 4
}


# Test URL for FOS
BASE_URL = "https://www.financial-ombudsman.org.uk/decisions-case-studies/ombudsman-decisions/search?Keyword=scam&Business={banking_code}&DateFrom=2022-01-01&DateTo=2022-06-01&{upheld_code}&Sort=relevance"



In [2]:
# You can see that you can use the search within the parameters of the URL
def determine_banking_codes(use_specified: bool = True, additional_key_values: dict = {}):
    """
    Function that can scrap the FOS website for the banking codes and keys
    
    :param use_specified: Bool, use default, pre-specified banking code
    :param additional_key_values: dict, if we use pre-specified code, we can add extra key and values
    :return: dict, key and value of bank and banking code
    """


    if use_specified:
        banking_codes = BANKING_CODES
        if len(additional_key_values) > 0:
            for key, value in additional_key_values:
                banking_codes[key] = value
    else:
        # Need to add a webscraping function tograb from FOS website
        print("Need to implement")


    return banking_codes



In [19]:
def obtain_number_results(url: str, banking_code: int, is_upheld: bool = True) -> int:
    """
    Function to obtain the number of search results for Upheld/Not Upheld.

    :param url: str, url of FOS webpage
    :param banking_code: int, business code according to FOS
    :param is_upheld: Boolean, determines how to amend the url
    :return: int, number of search results

    """


    upheld_code = "IsUpheld%5B1%5D=1" if is_upheld else "IsUpheld%5B0%5D=0"

    amended_url = url.format(banking_code=str(banking_code), upheld_code=upheld_code)
    
    res = requests.get(amended_url)

    soup = bs4.BeautifulSoup(res.text, "html.parser")

    out = soup.find_all("div", {"class": "search-results-holder"})[0]
    print(out)

    search_results = out.text.split("\n")[1].split()[-2]

    
    return int(search_results)


In [20]:
def upheld_vs_not_upheld(banking_codes: dict) -> pd.DataFrame:
    """
    Function to grab the upheld vs not upheld cases

    :param banking_codes: dict, codes of the banks
    :return: pd.DataFrame
    """
    
    if len(banking_codes) == 0:
        logger.error("No banking codes")
        raise


    search_results = {}


    for key, value in banking_codes.items():
        logger.info("Bank: %s - Start", key)
        search_results[key] = {}
        search_results[key]["upheld"] = obtain_number_results(url=BASE_URL, banking_code=str(value), is_upheld=True)
        search_results[key]["not_upheld"] = obtain_number_results(url=BASE_URL, banking_code=str(value), is_upheld=False)
        search_results[key]["proportion"] = search_results[key]["upheld"]/(search_results[key]["not_upheld"] + search_results[key]["upheld"])
        logger.info("Bank: %s - End", key)


    df = pd.DataFrame(search_results).T


    return df

In [21]:
def run():
    """
    Main orchestration function

    :return: number of search results
    """


    print("Determine Banks - NEEDS TO BE UPDATED")    
    banking_codes = determine_banking_codes(
        use_specified=True, 
        additional_key_values={}
        )


    print("Begin FOS scrape")


    df = upheld_vs_not_upheld(banking_codes=banking_codes)

    print("Push to Twitter")
    # Add a step to login into an account and push to twitter
    

    return df




In [22]:
if __name__ == "__main__":
    output = run()
    print(output)

INFO:__main__:Bank: Barclays - Start


Determine Banks - NEEDS TO BE UPDATED
Begin FOS scrape
<div class="search-results-holder">
<p>Your search returned 368 results</p>
<ul class="search-results" role="list">
<li>
<a class="search-result" href="decision/DRN-3148865.pdf" target="_blank">
<h4>Decision Reference DRN-3148865</h4>
<div class="search-result__info">
<div class="search-result__info-main">
<em>14 Jan 2022</em>
<span class="search-result__separator"></span>
                                                            Barclays Bank UK PLC
                                                        

                                                        <span class="search-result__separator"></span>
                                                        Upheld
                                                    </div>
<span class="search-result__tag tag tag--light">Banking and Payments</span>
</div>
<div class="search-result__desc">
                                                    DRN-3148865 The complaint Ms K, on b

INFO:__main__:Bank: Barclays - End
INFO:__main__:Bank: Natwest - Start


<div class="search-results-holder">
<p>Your search returned 612 results</p>
<ul class="search-results" role="list">
<li>
<a class="search-result" href="decision/DRN-3327845.pdf" target="_blank">
<h4>Decision Reference DRN-3327845</h4>
<div class="search-result__info">
<div class="search-result__info-main">
<em>28 Apr 2022</em>
<span class="search-result__separator"></span>
                                                            Lloyds Bank PLC
                                                        

                                                        <span class="search-result__separator"></span>
                                                        Not upheld
                                                    </div>
<span class="search-result__tag tag tag--light">Banking and Payments</span>
</div>
<div class="search-result__desc">
                                                    DRN-3327845 The complaint Mrs C is unhappy Lloyds Bank PLC haven’t refunded money she lost a

<div class="search-results-holder">
<p>Your search returned 5 results</p>
<ul class="search-results" role="list">
<li>
<a class="search-result" href="decision/DRN-3347192.pdf" target="_blank">
<h4>Decision Reference DRN-3347192</h4>
<div class="search-result__info">
<div class="search-result__info-main">
<em>23 Mar 2022</em>
<span class="search-result__separator"></span>
                                                            National Westminster Bank Plc
                                                        

                                                        <span class="search-result__separator"></span>
                                                        Upheld
                                                    </div>
<span class="search-result__tag tag tag--light">Banking and Payments</span>
</div>
<div class="search-result__desc">
                                                    DRN-3347192 The complaint and background Mr A complains that National Westminster Ba

INFO:__main__:Bank: Natwest - End
INFO:__main__:Bank: Nationwide - Start


<div class="search-results-holder">
<p>Your search returned 38 results</p>
<ul class="search-results" role="list">
<li>
<a class="search-result" href="decision/DRN-3417596.pdf" target="_blank">
<h4>Decision Reference DRN-3417596</h4>
<div class="search-result__info">
<div class="search-result__info-main">
<em>9 May 2022</em>
<span class="search-result__separator"></span>
                                                            National Westminster Bank Plc
                                                        

                                                        <span class="search-result__separator"></span>
                                                        Not upheld
                                                    </div>
<span class="search-result__tag tag tag--light">Banking and Payments</span>
</div>
<div class="search-result__desc">
                                                    DRN-3417596 Complaint Ms J is unhappy that National Westminster Bank Plc (“NatWe

<div class="search-results-holder">
<p>Your search returned 8 results</p>
<ul class="search-results" role="list">
<li>
<a class="search-result" href="decision/DRN-3396191.pdf" target="_blank">
<h4>Decision Reference DRN-3396191</h4>
<div class="search-result__info">
<div class="search-result__info-main">
<em>25 Mar 2022</em>
<span class="search-result__separator"></span>
                                                            Nationwide Building Society
                                                        

                                                        <span class="search-result__separator"></span>
                                                        Upheld
                                                    </div>
<span class="search-result__tag tag tag--light">Banking and Payments</span>
</div>
<div class="search-result__desc">
                                                    DRN-3396191 The complaint Mrs S complains that Nationwide Building Society won’t reimb

INFO:__main__:Bank: Nationwide - End


<div class="search-results-holder">
<p>Your search returned 12 results</p>
<ul class="search-results" role="list">
<li>
<a class="search-result" href="decision/DRN-3370872.pdf" target="_blank">
<h4>Decision Reference DRN-3370872</h4>
<div class="search-result__info">
<div class="search-result__info-main">
<em>31 Mar 2022</em>
<span class="search-result__separator"></span>
                                                            Nationwide Building Society
                                                        

                                                        <span class="search-result__separator"></span>
                                                        Not upheld
                                                    </div>
<span class="search-result__tag tag tag--light">Banking and Payments</span>
</div>
<div class="search-result__desc">
                                                    DRN-3370872 The complaint Mr and Mrs F are unhappy Nationwide Building Society di