In [1]:
# default_exp retrieval

# Outage Data Retrieval

<br>

### Imports

In [95]:
#exports
import json
import pandas as pd

import html
import requests
from bs4 import BeautifulSoup as bs
from warnings import warn

from ipypb import track

In [90]:
from IPython.display import JSON

<br>

### UK Power Networks

https://www.ukpowernetworks.co.uk/power-cut/map

In [3]:
incidents_url = 'https://www.ukpowernetworks.co.uk/Incidents/GetIncidents'

r_json = requests.get(incidents_url).json()

JSON(r_json)

<IPython.core.display.JSON object>

<br>

We'll now extract the incident ids as we can use them to generate the url for the JSON endpoint with the incident data. Items where the `PowerCutType` is set to `Multiple` have been purposefully removed, this is because they don't include an incident id (but we'll handle them next).

In [4]:
#exports
extract_single_incident_ids = lambda r_json: [
    incident['PanelContentUrl'].split('incidentId=')[1] 
    for incident 
    in r_json['Incidents'] 
    if incident['PowerCutType'] != 'Multiple'
]

In [5]:
single_incident_ids = extract_single_incident_ids(r_json)

JSON([single_incident_ids])

<IPython.core.display.JSON object>

<br>

For the `Multiple` items we'll extract the url for the incidents metadata, which includes the ids within them.

In [6]:
#exports
extract_multiple_incident_urls = lambda r_json: [
    f"https://www.ukpowernetworks.co.uk{incident['PanelContentUrl']}"
    for incident 
    in r_json['Incidents'] 
    if incident['PowerCutType'] == 'Multiple'
]

In [7]:
multiple_incident_urls = extract_multiple_incident_urls(r_json)

JSON([multiple_incident_urls])

<IPython.core.display.JSON object>

In [8]:
#exports
def extract_multiple_incident_ids(multiple_incident_urls):
    incident_ids = []

    for multiple_incident_url in track(multiple_incident_urls, label='Multiple Ids'):
        r = requests.get(multiple_incident_url)
        soup = bs(r.text, features='lxml')

        incident_ids += [
            link['data-url'].split('incidentId=')[1] 
            for link 
            in soup.find('div', {'class': 'multiple-incidents--wrapper mb-4'}).findAll('a')
        ]
        
    return incident_ids

In [9]:
multiple_incident_ids = extract_multiple_incident_ids(multiple_incident_urls)

multiple_incident_ids[:5]

['INCD-53064-V',
 'INCD-247193-G',
 'INCD-247216-G',
 'INCD-51378-V',
 'INCD-227015-Z']

In [10]:
#exports
get_incident_detail_url = lambda incident_id: f'https://www.ukpowernetworks.co.uk/Incidents/getincidentdetails?incidentid={incident_id}'

In [36]:
incident_id = multiple_incident_ids[1]

incident_detail_url = get_incident_detail_url(incident_id)
r_json = requests.get(incident_detail_url).json()

JSON(r_json)

<IPython.core.display.JSON object>

In [37]:
#exports
def extract_relevant_info(r_json):
    incident_info = {
        'incident_active': r_json['ServiceDown'],
        'restored_time': r_json['UKPNIncident']['RestoredDateTime'],
        'estimated_restored_time': r_json['UKPNIncident']['EstimatedRestorationDate'],
        'planned_time': r_json['UKPNIncident']['PlannedDate'],
        'received_time': r_json['UKPNIncident']['ReceivedDate'],
        'postcodes_impacted': r_json['FullPostcodeData'],
        'description': r_json['IncidentCategoryCustomerFriendlyDescription']
    }
    
    return incident_info

In [38]:
incident_info = extract_relevant_info(r_json)

JSON(incident_info)

<IPython.core.display.JSON object>

In [39]:
#exports
def get_ukpn_incidents_info(incidents_url='https://www.ukpowernetworks.co.uk/Incidents/GetIncidents'):
    r_json = requests.get(incidents_url).json()

    incident_ids = (
        extract_single_incident_ids(r_json) + 
        extract_multiple_incident_ids(extract_multiple_incident_urls(r_json))
    )
    
    raw_incidents_info = dict()
    cleaned_incidents_info = dict()

    for incident_id in track(incident_ids, label='Details'):
        try:
            incident_detail_url = get_incident_detail_url(incident_id)
            r_json = requests.get(incident_detail_url).json()

            raw_incidents_info[incident_id] = r_json
            cleaned_incidents_info[incident_id] = extract_relevant_info(r_json)
        except:
            warn(f'Failed to retrieve incident details for: {incident_id}')
        
    return raw_incidents_info, cleaned_incidents_info

In [40]:
raw_incidents_info, cleaned_incidents_info = get_ukpn_incidents_info()

JSON([cleaned_incidents_info])

<IPython.core.display.JSON object>

In [41]:
#exports
def save_json_data(data, filename, data_dir='../data/raw'):
    with open(f'{data_dir}/{filename}.json', 'w') as fp:
        json.dump(data, fp)

In [42]:
save_json_data(raw_incidents_info, 'ukpn_incidents_info')

<br>

### Scottish & Southern Electricity Networks

https://www.ssen.co.uk/Powertrack/

N.b. we occasionaly get date values of: `Date(-62135596800000)`. This occurs when no date can be found so we will set these to `None`.

In [45]:
url = 'https://www.ssen.co.uk/Sse_Components/Views/Controls/FormControls/PowerTrackHandler.ashx'

r_json = requests.get(url, verify=False).json()

JSON(r_json)



<IPython.core.display.JSON object>

In [47]:
#exports
def clean_ssen_incident_info(incident):
    ssen_relevant_dates_name_mapping = { 
        'LoggedAtUtc': 'received_time',
        'EstimatedArrivalOnSiteUtc': 'estimated_arrival_time',
        'EstimatedRestorationTimeUtc': 'estimated_restored_time'
     }

    cleaned_incident_info = dict()

    for old_dt_name, new_dt_name in ssen_relevant_dates_name_mapping.items():
        dt_str = incident[old_dt_name].split('(')[1].split(')')[0]

        if dt_str != '-62135596800000':
            dt = pd.to_datetime(int(dt_str)*1e6).strftime('%Y-%m-%d %H:%M:%S')
        else:
            dt = None

        cleaned_incident_info[new_dt_name] = dt

    cleaned_incident_info['postcodes_impacted'] = incident['AffectedAreas']
    cleaned_incident_info['description'] = incident['Message']
    
    return cleaned_incident_info

In [48]:
incident = r_json['Faults'][0]

cleaned_incident_info = clean_ssen_incident_info(incident)
    
JSON(cleaned_incident_info)

<IPython.core.display.JSON object>

In [49]:
#exports
def get_ssen_incidents_info(incidents_url='https://www.ssen.co.uk/Sse_Components/Views/Controls/FormControls/PowerTrackHandler.ashx'):
    raw_incidents_info = requests.get(incidents_url, verify=False).json()
    cleaned_incidents_info = dict()

    for incident in track(raw_incidents_info['Faults']):
        incident_ref = incident['Reference']
        cleaned_incidents_info[incident_ref] = clean_ssen_incident_info(incident)
        
    return raw_incidents_info, cleaned_incidents_info

In [50]:
raw_incidents_info, cleaned_incidents_info = get_ssen_incidents_info()

JSON([cleaned_incidents_info])



<IPython.core.display.JSON object>

In [51]:
save_json_data(raw_incidents_info, 'ssen_incidents_info')

<br>

### Western Power Distribution

https://powercuts.westernpower.co.uk/

In [73]:
#exports
def get_wpd_incident_feed(url='https://powercuts.westernpower.co.uk'):
    r = requests.get(url)

    raw_incidents_info = json.loads(html.unescape(r.text.split('data-ng-init="init(')[1].split(')"></div><div id="powercuts"')[0]))
    feed = json.loads(raw_incidents_info['feed'])
    
    return feed

In [74]:
feed = get_wpd_incident_feed()

JSON(feed)

<IPython.core.display.JSON object>

In [53]:
incident_ids = [incident['id'] for incident in feed['incidents']]

incident_ids[:5]

['INCD-36877-C',
 'INCD-74461-T',
 'INCD-124506-B',
 'INCD-124495-B',
 'INCD-124505-B']

In [54]:
#exports
wpd_incident_id_to_url = lambda incident_id: f'https://powercuts.westernpower.co.uk/__powercuts/getIncidentById?incidentId={incident_id}'

In [58]:
incident_id = incident_ids[0]

incident_url = wpd_incident_id_to_url(incident_id)
r_json = requests.get(incident_url).json()

JSON(r_json)

<IPython.core.display.JSON object>

In [81]:
#exports
def clean_wpd_incident_info(incident):
    wpd_relevant_dates_name_mapping = { 
        'startTime': 'received_time',
        'etr': 'estimated_restored_time'
     }

    cleaned_incident_info = dict()

    for old_dt_name, new_dt_name in wpd_relevant_dates_name_mapping.items():
        if incident[old_dt_name] is not None:
            cleaned_incident_info[new_dt_name] = pd.to_datetime(incident[old_dt_name]*1e6).strftime('%Y-%m-%d %H:%M:%S')
        else:
            cleaned_incident_info[new_dt_name] = None

    cleaned_incident_info['postcodes_impacted'] = incident['postcodes']
    cleaned_incident_info['description'] = incident['status']
    cleaned_incident_info['incident_active'] = bool(1 - incident['restored'])
    
    return cleaned_incident_info

In [72]:
clean_wpd_incident_info(r_json)

{'received_time': '2021-03-24 12:10:00',
 'estimated_restored_time': '2021-03-24 15:30:00',
 'postcodes_impacted': ['BS31 1QG',
  'BS31 1QE',
  'BS31 1QL',
  'BS31 1RW',
  'BS31 1QQ',
  'BS31 1RS',
  'BS31 1RN',
  'BS4 2NN',
  'BS31 1RR'],
 'description': 'Awaiting',
 'incident_active': True}

In [79]:
#exports
def get_wpd_incidents_info(incidents_url='https://powercuts.westernpower.co.uk'):
    cleaned_incidents_info = dict()
    
    raw_incidents_info = get_wpd_incident_feed(incidents_url)
    incident_ids = [incident['id'] for incident in raw_incidents_info['incidents']]

    for incident_id in incident_ids:
        incident_url = wpd_incident_id_to_url(incident_id)
        r_json = requests.get(incident_url).json()
        cleaned_incidents_info[incident_id] = clean_wpd_incident_info(r_json)
        
    return raw_incidents_info, cleaned_incidents_info

In [82]:
raw_incidents_info, cleaned_incidents_info = get_wpd_incidents_info()

JSON(cleaned_incidents_info)

<IPython.core.display.JSON object>

In [85]:
save_json_data(raw_incidents_info, 'wpd_incidents_info')

<br>

### Scottish Power Energy Networks

https://www.spenergynetworks.co.uk/pages/power_cuts_list.aspx

<br>

### Northern Power Grid

https://www.northernpowergrid.com/power-cuts

In [68]:
url = 'https://www.northernpowergrid.com/power-cuts'

r = requests.get(url)
soup = bs(r.text)

soup.find('div', attrs={'role': 'table'})

In [98]:
def _(main_url='https://www.northernpowergrid.com/power-cuts', data_url='https://www.northernpowergrid.com/powercutsgetallbyincno'):
    session = requests.Session()

    r = session.get(main_url)

    data = {
        'method': 'incno',
        'typeFilters': '',
        'timeFilters': '',
        'postcodeFilter': '',
        'categoryFilters': 'Service Cutout Change,Asset repairs by Troublecall,Metering,Emergency Disconnection,Emergency Disconnection (Charge),Cat A,Cat B,Cat C',
        'authenticityToken': session.cookies['PLAY_SESSION'].split('AT=')[-1].replace('"', '')
    }

    r = session.post(url, data=data)

    data = json.loads(r.json()['data'])
    
    return data

data = _()

JSON(data)

<IPython.core.display.JSON object>

<br>

### Electricity North West

https://www.enwl.co.uk/power-cuts/

In [None]:
# need to handle page size and number
'https://www.enwl.co.uk/power-outages/search?pageSize=10&postcodeOrReferenceNumber=&pageNumber=2&includeCurrent=true&includeResolved=true&includeTodaysPlanned=true&includeFuturePlanned=true&includeCancelledPlanned=true'

In [None]:
# incident_active (if known)
# logged/start time
# estimated time of fix
# postcodes impacted
# description

In [96]:
#hide
from nbdev.export import *
notebook2script()

Converted 01-retrieval.ipynb.
Converted 02-collation.ipynb.
