In [1]:
# default_exp retrieval

# Outage Data Retrieval

<br>

### Imports

In [2]:
#exports
import json
import pandas as pd

import html
import requests
from bs4 import BeautifulSoup as bs
from warnings import warn

from ipypb import track

In [3]:
from IPython.display import JSON, HTML

<br>

### UK Power Networks

https://www.ukpowernetworks.co.uk/power-cut/map

In [5]:
incidents_url = 'https://www.ukpowernetworks.co.uk/Incidents/GetIncidents'

r_json = requests.get(incidents_url).json()

JSON(r_json)

<IPython.core.display.JSON object>

<br>

We'll now extract the incident ids as we can use them to generate the url for the JSON endpoint with the incident data. Items where the `PowerCutType` is set to `Multiple` have been purposefully removed, this is because they don't include an incident id (but we'll handle them next).

In [6]:
#exports
extract_ukpn_single_incident_ids = lambda r_json: [
    incident['PanelContentUrl'].split('incidentId=')[1] 
    for incident 
    in r_json['Incidents'] 
    if incident['PowerCutType'] != 'Multiple'
]

In [7]:
single_incident_ids = extract_ukpn_single_incident_ids(r_json)

JSON([single_incident_ids])

<IPython.core.display.JSON object>

<br>

For the `Multiple` items we'll extract the url for the incidents metadata, which includes the ids within them.

In [8]:
#exports
extract_ukpn_multiple_incident_urls = lambda r_json: [
    f"https://www.ukpowernetworks.co.uk{incident['PanelContentUrl']}"
    for incident 
    in r_json['Incidents'] 
    if incident['PowerCutType'] == 'Multiple'
]

In [9]:
multiple_incident_urls = extract_ukpn_multiple_incident_urls(r_json)

JSON([multiple_incident_urls])

<IPython.core.display.JSON object>

In [10]:
#exports
def extract_ukpn_multiple_incident_ids(multiple_incident_urls):
    incident_ids = []

    for multiple_incident_url in track(multiple_incident_urls, label='Multiple Ids'):
        r = requests.get(multiple_incident_url)
        soup = bs(r.text, features='lxml')

        incident_ids += [
            link['data-url'].split('incidentId=')[1] 
            for link 
            in soup.find('div', {'class': 'multiple-incidents--wrapper mb-4'}).findAll('a')
        ]
        
    return incident_ids

In [11]:
multiple_incident_ids = extract_ukpn_multiple_incident_ids(multiple_incident_urls)

multiple_incident_ids[:5]

['INCD-247974-G',
 'INCD-247994-G',
 'INCD-247974-G',
 'INCD-247976-G',
 'INCD-247987-G']

In [12]:
#exports
get_ukpn_incident_detail_url = lambda incident_id: f'https://www.ukpowernetworks.co.uk/Incidents/getincidentdetails?incidentid={incident_id}'

In [13]:
incident_id = multiple_incident_ids[1]

incident_detail_url = get_ukpn_incident_detail_url(incident_id)
r_json = requests.get(incident_detail_url).json()

JSON(r_json)

<IPython.core.display.JSON object>

In [14]:
#exports
def is_ukpn_incident_active(r_json):
    if r_json['PowerCutType'] == 'Restored':
        return False
    else:
        return True
    
def extract_ukpn_relevant_info(r_json):
    incident_info = {
        'incident_active': is_ukpn_incident_active(r_json),
        'restored_time': r_json['UKPNIncident']['RestoredDateTime'],
        'estimated_restored_time': r_json['UKPNIncident']['EstimatedRestorationDate'],
        'planned_time': r_json['UKPNIncident']['PlannedDate'],
        'received_time': r_json['UKPNIncident']['ReceivedDate'],
        'postcodes_impacted': r_json['FullPostcodeData'],
        'description': r_json['IncidentCategoryCustomerFriendlyDescription'],
        'incident_url': f"https://www.ukpowernetworks.co.uk/power-cut/map?incidentid={r_json['IncidentReference']}"
    }
    
    return incident_info

In [15]:
incident_info = extract_ukpn_relevant_info(r_json)

JSON(incident_info)

<IPython.core.display.JSON object>

In [16]:
#exports
def get_ukpn_incidents_info(incidents_url='https://www.ukpowernetworks.co.uk/Incidents/GetIncidents'):
    r_json = requests.get(incidents_url).json()

    incident_ids = (
        extract_ukpn_single_incident_ids(r_json) + 
        extract_ukpn_multiple_incident_ids(extract_ukpn_multiple_incident_urls(r_json))
    )
    
    raw_incidents_info = dict()
    cleaned_incidents_info = dict()

    for incident_id in track(incident_ids, label='Details'):
        try:
            incident_detail_url = get_ukpn_incident_detail_url(incident_id)
            r_json = requests.get(incident_detail_url).json()

            raw_incidents_info[incident_id] = r_json
            cleaned_incidents_info[incident_id] = extract_ukpn_relevant_info(r_json)
        except:
            warn(f'Failed to retrieve incident details for: {incident_id}')
        
    return raw_incidents_info, cleaned_incidents_info

In [17]:
raw_incidents_info, cleaned_incidents_info = get_ukpn_incidents_info()

JSON([cleaned_incidents_info])

<IPython.core.display.JSON object>

In [18]:
#exports
def save_json_data(data, filename, data_dir='data/raw'):
    with open(f'{data_dir}/{filename}.json', 'w') as fp:
        json.dump(data, fp)

In [19]:
save_json_data(raw_incidents_info, 'ukpn_incidents_info', data_dir='../data/raw')

<br>

### Scottish & Southern Electricity Networks

https://www.ssen.co.uk/Powertrack/

N.b. we occasionaly get date values of: `Date(-62135596800000)`. This occurs when no date can be found so we will set these to `None`.

In [21]:
url = 'https://www.ssen.co.uk/Sse_Components/Views/Controls/FormControls/PowerTrackHandler.ashx'

r_json = requests.get(url, verify=False).json()

JSON(r_json)



<IPython.core.display.JSON object>

In [None]:
#exports
def clean_ssen_incident_info(incident):
    ssen_relevant_dates_name_mapping = { 
        'LoggedAtUtc': 'received_time',
        'EstimatedArrivalOnSiteUtc': 'estimated_arrival_time',
        'EstimatedRestorationTimeUtc': 'estimated_restored_time'
     }

    cleaned_incident_info = dict()

    for old_dt_name, new_dt_name in ssen_relevant_dates_name_mapping.items():
        dt_str = incident[old_dt_name].split('(')[1].split(')')[0]

        if dt_str != '-62135596800000':
            try:
                dt = pd.to_datetime(int(dt_str)*1e6).strftime('%Y-%m-%d %H:%M:%S')
            except:
                warn(f'{dt_str} could not be parsed to a date')
                dt = None
        else:
            dt = None

        cleaned_incident_info[new_dt_name] = dt

    cleaned_incident_info['postcodes_impacted'] = incident['AffectedAreas']
    cleaned_incident_info['description'] = incident['Message']
    cleaned_incident_info['incident_active'] = True
    cleaned_incident_info['incident_url'] = 'https://www.ssen.co.uk/Powertrack/'
    
    return cleaned_incident_info

In [None]:
incident = r_json['Faults'][0]

cleaned_incident_info = clean_ssen_incident_info(incident)
    
JSON(cleaned_incident_info)

In [None]:
#exports
def get_ssen_incidents_info(incidents_url='https://www.ssen.co.uk/Sse_Components/Views/Controls/FormControls/PowerTrackHandler.ashx'):
    raw_incidents_info = requests.get(incidents_url, verify=False).json()
    cleaned_incidents_info = dict()

    for incident in track(raw_incidents_info['Faults']):
        incident_ref = incident['Reference']
        cleaned_incidents_info[incident_ref] = clean_ssen_incident_info(incident)
        
    return raw_incidents_info, cleaned_incidents_info

In [None]:
raw_incidents_info, cleaned_incidents_info = get_ssen_incidents_info()

JSON([cleaned_incidents_info])

In [None]:
save_json_data(raw_incidents_info, 'ssen_incidents_info', data_dir='../data/raw')

<br>

### Western Power Distribution

https://powercuts.westernpower.co.uk/

In [None]:
#exports
def get_wpd_incident_feed(url='https://powercuts.westernpower.co.uk'):
    r = requests.get(url)

    raw_incidents_info = json.loads(html.unescape(r.text.split('data-ng-init="init(')[1].split(')"></div><div id="powercuts"')[0]))
    feed = json.loads(raw_incidents_info['feed'])
    
    return feed

In [None]:
feed = get_wpd_incident_feed()

JSON(feed)

In [None]:
incident_ids = [incident['id'] for incident in feed['incidents']]

incident_ids[:5]

In [None]:
#exports
wpd_incident_id_to_url = lambda incident_id: f'https://powercuts.westernpower.co.uk/__powercuts/getIncidentById?incidentId={incident_id}'

In [None]:
incident_id = incident_ids[0]

incident_url = wpd_incident_id_to_url(incident_id)
r_json = requests.get(incident_url).json()

JSON(r_json)

In [None]:
#exports
def clean_wpd_incident_info(incident):
    wpd_relevant_dates_name_mapping = { 
        'startTime': 'received_time',
        'etr': 'estimated_restored_time'
     }

    cleaned_incident_info = dict()

    for old_dt_name, new_dt_name in wpd_relevant_dates_name_mapping.items():
        if incident[old_dt_name] is not None:
            cleaned_incident_info[new_dt_name] = pd.to_datetime(incident[old_dt_name]*1e6).strftime('%Y-%m-%d %H:%M:%S')
        else:
            cleaned_incident_info[new_dt_name] = None

    cleaned_incident_info['postcodes_impacted'] = incident['postcodes']
    cleaned_incident_info['description'] = incident['status']
    cleaned_incident_info['incident_active'] = bool(1 - incident['restored'])
    cleaned_incident_info['incident_url'] = f"https://powercuts.westernpower.co.uk/incident/{incident['id']}"
    
    return cleaned_incident_info

In [None]:
JSON(clean_wpd_incident_info(r_json))

In [None]:
#exports
def get_wpd_incidents_info(incidents_url='https://powercuts.westernpower.co.uk'):
    cleaned_incidents_info = dict()
    
    raw_incidents_info = get_wpd_incident_feed(incidents_url)
    incident_ids = [incident['id'] for incident in raw_incidents_info['incidents']]

    for incident_id in incident_ids:
        incident_url = wpd_incident_id_to_url(incident_id)
        r_json = requests.get(incident_url).json()
        cleaned_incidents_info[incident_id] = clean_wpd_incident_info(r_json)
        
    return raw_incidents_info, cleaned_incidents_info

In [None]:
raw_incidents_info, cleaned_incidents_info = get_wpd_incidents_info()

JSON(cleaned_incidents_info)

In [None]:
save_json_data(raw_incidents_info, 'wpd_incidents_info', data_dir='../data/raw')

<br>

### Scottish Power Energy Networks

https://www.spenergynetworks.co.uk/pages/power_cuts_list.aspx

In [None]:
#exports
get_raw_sp_incidents_info = lambda sp_map_url='https://www.spenergynetworks.co.uk/pages/power_cuts_map.aspx': json.loads(requests.get(sp_map_url).text.split('arrPowercutsPostcodes:')[1].split('strPagePathListView')[0].replace(',\r\n', '').strip())

In [None]:
incidents_info = get_raw_sp_incidents_info()

JSON([incidents_info])

In [None]:
#exports
def clean_sp_incident_info(incident_info):
    def clean_dt(dt):
        if dt is None:
            return ''
        else:
            return pd.to_datetime(dt, utc=True).strftime('%Y-%m-%d %H:%M')
    
    cleaned_incident_info = dict()

    cleaned_incident_info['received_time'] = clean_dt(incident_info['CREATION_DATE'])
    cleaned_incident_info['estimated_restored_time'] = clean_dt(incident_info['EST_REST_DATE'])
    cleaned_incident_info['postcodes_impacted'] = incident_info['POSTCODES']
    cleaned_incident_info['description'] = incident_info['MAIN_MESSAGE']
    cleaned_incident_info['incident_active'] = incident_info['HISTORIC_FLAG'] is None
    cleaned_incident_info['incident_url'] = f"https://www.spenergynetworks.co.uk/pages/power_cuts_map.aspx?incRef={incident_info['INCIDENT_REF']}"
    
    return cleaned_incident_info

In [None]:
clean_sp_incident_info(incidents_info[0])

In [None]:
#exports
def get_sp_incidents_info(sp_map_url='https://www.spenergynetworks.co.uk/pages/power_cuts_map.aspx'):
    cleaned_incidents_info = dict()
    
    raw_incidents_info = get_raw_sp_incidents_info(sp_map_url)

    for incident_info in raw_incidents_info:
        incident_id = incident_info['INCIDENT_REF']
        cleaned_incidents_info[incident_id] = clean_sp_incident_info(incident_info)
        
    return raw_incidents_info, cleaned_incidents_info

In [None]:
raw_incidents_info, cleaned_incidents_info = get_sp_incidents_info()

JSON([cleaned_incidents_info])

In [None]:
save_json_data(raw_incidents_info, 'sp_incidents_info', data_dir='../data/raw')

<br>

### Northern Power Grid

https://www.northernpowergrid.com/power-cuts

In [None]:
#exports
def get_np_auth(np_main_url='https://www.northernpowergrid.com/power-cuts'):
    s = requests.Session()

    r = s.get(np_main_url)
    soup = bs(r.text, features='lxml')

    authenticity_token = soup.find('input', attrs={'name': 'authenticityToken'})['value']

    return s, authenticity_token

In [None]:
s, authenticity_token = get_np_auth()

authenticity_token

In [None]:
#exports
def get_np_r(
    np_main_url='https://www.northernpowergrid.com/power-cuts', 
    np_incidents_url='https://www.northernpowergrid.com/powercutsgetallbyincno'
):
    s, authenticity_token = get_np_auth(np_main_url)
    
    data = {
        'method': 'incno',
        'categoryFilters': 'Service Cutout Change,Asset repairs by Troublecall,Metering,Emergency Disconnection,Emergency Disconnection (Charge),Cat A,Cat B,Cat C',
        'authenticityToken': authenticity_token
    }

    r = s.post(np_incidents_url, data=data)

    return r

In [None]:
r = get_np_r()

r

In [None]:
#exports
get_np_raw_incidents_info = lambda r: json.loads(r.json()['data'])

In [None]:
raw_incidents_info = get_np_raw_incidents_info(r)

JSON(raw_incidents_info)

In [None]:
#exports
def clean_np_incident_info(incident_info):
    def clean_dt(dt):
        if dt is None:
            return ''
        else:
            return pd.to_datetime(dt, unit='ms').strftime('%Y-%m-%d %H:%M')
        
    cleaned_incident_info = dict()

    cleaned_incident_info['received_time'] = clean_dt(incident_info['logged'])
    cleaned_incident_info['estimated_restored_time'] = clean_dt(incident_info['estimatedTimeTillResolution'])
    cleaned_incident_info['postcodes_impacted'] = incident_info['postcodes']
    cleaned_incident_info['description'] = f"{incident_info['category']} {incident_info['status']}"
    cleaned_incident_info['incident_active'] = (incident_info['totalConfirmedOff'] + incident_info['totalPredictedOff']) > 0
    cleaned_incident_info['incident_url'] = 'https://www.northernpowergrid.com/power-cuts'
    
    return cleaned_incident_info

In [None]:
incident_info = raw_incidents_info['powercuts']['PPC039814']

cleaned_incident_info = clean_np_incident_info(incident_info)

JSON(cleaned_incident_info)

In [None]:
#exports
def get_np_incidents_info(
    np_main_url='https://www.northernpowergrid.com/power-cuts', 
    np_incidents_url='https://www.northernpowergrid.com/powercutsgetallbyincno'
):
    cleaned_incidents_info = dict()
    
    r = get_np_r(np_main_url, np_incidents_url)
    raw_incidents_info = get_np_raw_incidents_info(r)

    for incident_id, incident_info in raw_incidents_info['powercuts'].items():
        cleaned_incidents_info[incident_id] = clean_np_incident_info(incident_info)
        
    return raw_incidents_info, cleaned_incidents_info

In [None]:
raw_incidents_info, cleaned_incidents_info = get_np_incidents_info()

JSON([cleaned_incidents_info])

<br>

### Electricity North West

https://www.enwl.co.uk/power-cuts/

In [None]:
#exports
get_enw_incidents_page = lambda page=1, page_size=10000: f'https://www.enwl.co.uk/power-outages/search?pageSize={page_size}&postcodeOrReferenceNumber=&pageNumber={page}&includeCurrent=true&includeResolved=true&includeTodaysPlanned=true&includeFuturePlanned=true&includeCancelledPlanned=true'

In [None]:
url = get_enw_incidents_page()

r_json = requests.get(url).json()

JSON([r_json])

In [None]:
#exports
def check_num_results(r_json):
    num_total_results = r_json['TotalResults']
    num_results_returned = len(r_json['Items'])

    if num_total_results != num_results_returned:
        warn(f'Only {num_results_returned} items were returned for Electricity North West when there are {num_total_results} in total')
        
    return

In [None]:
check_num_results(r_json)

<br>

We'll check that we get the expected warning when the conditions are met

In [None]:
url = get_enw_incidents_page(page_size=10)

r_json = requests.get(url).json()

check_num_results(r_json)

In [None]:
#exports
def get_enw_raw_incidents(page=1, page_size=10000):
    url = get_enw_incidents_page(page=page, page_size=page_size)
    raw_incidents = requests.get(url).json()
    check_num_results(raw_incidents)
    
    return raw_incidents

In [None]:
raw_incidents = get_enw_raw_incidents()

JSON([raw_incidents])

In [None]:
#exports
def clean_enw_incident_info(incident_info):
    def clean_dt(dt):
        if dt is None:
            return ''
        else:
            return pd.to_datetime(dt).strftime('%Y-%m-%d %H:%M')
        
    cleaned_incident_info = dict()
    cleaned_incident_info['received_time'] = clean_dt(incident_info['date'])
    cleaned_incident_info['estimated_restored_time'] = clean_dt(incident_info['estimatedTimeOfRestorationMajority'])
    cleaned_incident_info['postcodes_impacted'] = incident_info['AffectedPostcodes'].strip().split(', ')
    cleaned_incident_info['description'] = incident_info['AdditionalFaultInfo']
    cleaned_incident_info['incident_active'] = incident_info['FaultLabel'] in ['CurrentFault', 'Live power cut']
    cleaned_incident_info['incident_url'] = 'https://www.enwl.co.uk/power-cuts/power-cuts-power-cuts-live-power-cut-information-fault-list/fault-list'
    
    return cleaned_incident_info

In [None]:
incident_info = raw_incidents['Items'][0]

cleaned_incident_info = clean_enw_incident_info(incident_info)

JSON(cleaned_incident_info)

In [None]:
#exports
def get_enw_incidents_info(page=1, page_size=10000):
    cleaned_incidents_info = dict()
    
    raw_incidents_info = get_enw_raw_incidents(page=page, page_size=page_size)

    for incident_info in raw_incidents_info['Items']:
        incident_id = incident_info['faultNumber']
        cleaned_incidents_info[incident_id] = clean_enw_incident_info(incident_info)
        
    return raw_incidents_info, cleaned_incidents_info

In [None]:
raw_incidents_info, cleaned_incidents_info = get_enw_incidents_info()

JSON([cleaned_incidents_info])

In [23]:
#hide
from nbdev.export import *
notebook2script()

Converted 01-retrieval.ipynb.
Converted 02-collation.ipynb.
Converted 03-deployment.ipynb.
