In [1]:
# default_exp retrieval

# Outage Data Retrieval

<br>

### Imports

In [2]:
#exports
import json
import pandas as pd

import html
import requests
from bs4 import BeautifulSoup as bs
from warnings import warn

from ipypb import track

In [3]:
from IPython.display import JSON

<br>

### UK Power Networks

https://www.ukpowernetworks.co.uk/power-cut/map

In [4]:
incidents_url = 'https://www.ukpowernetworks.co.uk/Incidents/GetIncidents'

r_json = requests.get(incidents_url).json()

JSON(r_json)

<IPython.core.display.JSON object>

<br>

We'll now extract the incident ids as we can use them to generate the url for the JSON endpoint with the incident data. Items where the `PowerCutType` is set to `Multiple` have been purposefully removed, this is because they don't include an incident id (but we'll handle them next).

In [5]:
#exports
extract_single_incident_ids = lambda r_json: [
    incident['PanelContentUrl'].split('incidentId=')[1] 
    for incident 
    in r_json['Incidents'] 
    if incident['PowerCutType'] != 'Multiple'
]

In [6]:
single_incident_ids = extract_single_incident_ids(r_json)

JSON([single_incident_ids])

<IPython.core.display.JSON object>

<br>

For the `Multiple` items we'll extract the url for the incidents metadata, which includes the ids within them.

In [7]:
#exports
extract_multiple_incident_urls = lambda r_json: [
    f"https://www.ukpowernetworks.co.uk{incident['PanelContentUrl']}"
    for incident 
    in r_json['Incidents'] 
    if incident['PowerCutType'] == 'Multiple'
]

In [8]:
multiple_incident_urls = extract_multiple_incident_urls(r_json)

JSON([multiple_incident_urls])

<IPython.core.display.JSON object>

In [9]:
#exports
def extract_multiple_incident_ids(multiple_incident_urls):
    incident_ids = []

    for multiple_incident_url in track(multiple_incident_urls, label='Multiple Ids'):
        r = requests.get(multiple_incident_url)
        soup = bs(r.text, features='lxml')

        incident_ids += [
            link['data-url'].split('incidentId=')[1] 
            for link 
            in soup.find('div', {'class': 'multiple-incidents--wrapper mb-4'}).findAll('a')
        ]
        
    return incident_ids

In [10]:
multiple_incident_ids = extract_multiple_incident_ids(multiple_incident_urls)

multiple_incident_ids[:5]

['INCD-43890-C',
 'INCD-229089-Z',
 'INCD-43890-C',
 'INCD-229089-Z',
 'INCD-247687-G']

In [11]:
#exports
get_incident_detail_url = lambda incident_id: f'https://www.ukpowernetworks.co.uk/Incidents/getincidentdetails?incidentid={incident_id}'

In [12]:
incident_id = multiple_incident_ids[1]

incident_detail_url = get_incident_detail_url(incident_id)
r_json = requests.get(incident_detail_url).json()

JSON(r_json)

<IPython.core.display.JSON object>

In [17]:
#exports
def is_ukpn_incident_active(r_json):
    if r_json['PowerCutType'] == 'Restored':
        return False
    else:
        return True
    
def extract_relevant_info(r_json):
    incident_info = {
        'incident_active': is_ukpn_incident_active(r_json),
        'restored_time': r_json['UKPNIncident']['RestoredDateTime'],
        'estimated_restored_time': r_json['UKPNIncident']['EstimatedRestorationDate'],
        'planned_time': r_json['UKPNIncident']['PlannedDate'],
        'received_time': r_json['UKPNIncident']['ReceivedDate'],
        'postcodes_impacted': r_json['FullPostcodeData'],
        'description': r_json['IncidentCategoryCustomerFriendlyDescription'],
        'incident_url': f"https://www.ukpowernetworks.co.uk/power-cut/map?incidentid={r_json['IncidentReference']}"
    }
    
    return incident_info

In [18]:
incident_info = extract_relevant_info(r_json)

JSON(incident_info)

<IPython.core.display.JSON object>

In [15]:
#exports
def get_ukpn_incidents_info(incidents_url='https://www.ukpowernetworks.co.uk/Incidents/GetIncidents'):
    r_json = requests.get(incidents_url).json()

    incident_ids = (
        extract_single_incident_ids(r_json) + 
        extract_multiple_incident_ids(extract_multiple_incident_urls(r_json))
    )
    
    raw_incidents_info = dict()
    cleaned_incidents_info = dict()

    for incident_id in track(incident_ids, label='Details'):
        try:
            incident_detail_url = get_incident_detail_url(incident_id)
            r_json = requests.get(incident_detail_url).json()

            raw_incidents_info[incident_id] = r_json
            cleaned_incidents_info[incident_id] = extract_relevant_info(r_json)
        except:
            warn(f'Failed to retrieve incident details for: {incident_id}')
        
    return raw_incidents_info, cleaned_incidents_info

In [16]:
raw_incidents_info, cleaned_incidents_info = get_ukpn_incidents_info()

JSON([cleaned_incidents_info])

<IPython.core.display.JSON object>

In [17]:
#exports
def save_json_data(data, filename, data_dir='../data/raw'):
    with open(f'{data_dir}/{filename}.json', 'w') as fp:
        json.dump(data, fp)

In [18]:
save_json_data(raw_incidents_info, 'ukpn_incidents_info')

<br>

### Scottish & Southern Electricity Networks

https://www.ssen.co.uk/Powertrack/

N.b. we occasionaly get date values of: `Date(-62135596800000)`. This occurs when no date can be found so we will set these to `None`.

In [19]:
url = 'https://www.ssen.co.uk/Sse_Components/Views/Controls/FormControls/PowerTrackHandler.ashx'

r_json = requests.get(url, verify=False).json()

JSON(r_json)



<IPython.core.display.JSON object>

In [27]:
#exports
def clean_ssen_incident_info(incident):
    ssen_relevant_dates_name_mapping = { 
        'LoggedAtUtc': 'received_time',
        'EstimatedArrivalOnSiteUtc': 'estimated_arrival_time',
        'EstimatedRestorationTimeUtc': 'estimated_restored_time'
     }

    cleaned_incident_info = dict()

    for old_dt_name, new_dt_name in ssen_relevant_dates_name_mapping.items():
        dt_str = incident[old_dt_name].split('(')[1].split(')')[0]

        if dt_str != '-62135596800000':
            try:
                dt = pd.to_datetime(int(dt_str)*1e6).strftime('%Y-%m-%d %H:%M:%S')
            except:
                warn(f'{dt_str} could not be parsed to a date')
                dt = None
        else:
            dt = None

        cleaned_incident_info[new_dt_name] = dt

    cleaned_incident_info['postcodes_impacted'] = incident['AffectedAreas']
    cleaned_incident_info['description'] = incident['Message']
    cleaned_incident_info['incident_active'] = True
    cleaned_incident_info['incident_url'] = 'https://www.ssen.co.uk/Powertrack/'
    
    return cleaned_incident_info

In [28]:
incident = r_json['Faults'][0]

cleaned_incident_info = clean_ssen_incident_info(incident)
    
JSON(cleaned_incident_info)

<IPython.core.display.JSON object>

In [22]:
#exports
def get_ssen_incidents_info(incidents_url='https://www.ssen.co.uk/Sse_Components/Views/Controls/FormControls/PowerTrackHandler.ashx'):
    raw_incidents_info = requests.get(incidents_url, verify=False).json()
    cleaned_incidents_info = dict()

    for incident in track(raw_incidents_info['Faults']):
        incident_ref = incident['Reference']
        cleaned_incidents_info[incident_ref] = clean_ssen_incident_info(incident)
        
    return raw_incidents_info, cleaned_incidents_info

In [23]:
raw_incidents_info, cleaned_incidents_info = get_ssen_incidents_info()

JSON([cleaned_incidents_info])



<IPython.core.display.JSON object>

In [24]:
save_json_data(raw_incidents_info, 'ssen_incidents_info')

<br>

### Western Power Distribution

https://powercuts.westernpower.co.uk/

In [20]:
#exports
def get_wpd_incident_feed(url='https://powercuts.westernpower.co.uk'):
    r = requests.get(url)

    raw_incidents_info = json.loads(html.unescape(r.text.split('data-ng-init="init(')[1].split(')"></div><div id="powercuts"')[0]))
    feed = json.loads(raw_incidents_info['feed'])
    
    return feed

In [21]:
feed = get_wpd_incident_feed()

JSON(feed)

<IPython.core.display.JSON object>

In [22]:
incident_ids = [incident['id'] for incident in feed['incidents']]

incident_ids[:5]

['INCD-125132-B',
 'INCD-125145-B',
 'INCD-125125-B',
 'INCD-125121-B',
 'INCD-357153-G']

In [23]:
#exports
wpd_incident_id_to_url = lambda incident_id: f'https://powercuts.westernpower.co.uk/__powercuts/getIncidentById?incidentId={incident_id}'

In [24]:
incident_id = incident_ids[0]

incident_url = wpd_incident_id_to_url(incident_id)
r_json = requests.get(incident_url).json()

JSON(r_json)

<IPython.core.display.JSON object>

In [26]:
#exports
def clean_wpd_incident_info(incident):
    wpd_relevant_dates_name_mapping = { 
        'startTime': 'received_time',
        'etr': 'estimated_restored_time'
     }

    cleaned_incident_info = dict()

    for old_dt_name, new_dt_name in wpd_relevant_dates_name_mapping.items():
        if incident[old_dt_name] is not None:
            cleaned_incident_info[new_dt_name] = pd.to_datetime(incident[old_dt_name]*1e6).strftime('%Y-%m-%d %H:%M:%S')
        else:
            cleaned_incident_info[new_dt_name] = None

    cleaned_incident_info['postcodes_impacted'] = incident['postcodes']
    cleaned_incident_info['description'] = incident['status']
    cleaned_incident_info['incident_active'] = bool(1 - incident['restored'])
    cleaned_incident_info['incident_url'] = f"https://powercuts.westernpower.co.uk/incident/{incident['id']}"
    
    return cleaned_incident_info

In [28]:
JSON(clean_wpd_incident_info(r_json))

<IPython.core.display.JSON object>

In [None]:
#exports
def get_wpd_incidents_info(incidents_url='https://powercuts.westernpower.co.uk'):
    cleaned_incidents_info = dict()
    
    raw_incidents_info = get_wpd_incident_feed(incidents_url)
    incident_ids = [incident['id'] for incident in raw_incidents_info['incidents']]

    for incident_id in incident_ids:
        incident_url = wpd_incident_id_to_url(incident_id)
        r_json = requests.get(incident_url).json()
        cleaned_incidents_info[incident_id] = clean_wpd_incident_info(r_json)
        
    return raw_incidents_info, cleaned_incidents_info

In [None]:
raw_incidents_info, cleaned_incidents_info = get_wpd_incidents_info()

JSON(cleaned_incidents_info)

In [None]:
save_json_data(raw_incidents_info, 'wpd_incidents_info')

<br>

### Scottish Power Energy Networks

https://www.spenergynetworks.co.uk/pages/power_cuts_list.aspx

<br>

### Northern Power Grid

https://www.northernpowergrid.com/power-cuts

In [None]:
url = 'https://www.northernpowergrid.com/power-cuts'

r = requests.get(url)
soup = bs(r.text)

soup.find('div', attrs={'role': 'table'})

In [None]:
def _(main_url='https://www.northernpowergrid.com/power-cuts', data_url='https://www.northernpowergrid.com/powercutsgetallbyincno'):
    session = requests.Session()

    r = session.get(main_url)

    data = {
        'method': 'incno',
        'typeFilters': '',
        'timeFilters': '',
        'postcodeFilter': '',
        'categoryFilters': 'Service Cutout Change,Asset repairs by Troublecall,Metering,Emergency Disconnection,Emergency Disconnection (Charge),Cat A,Cat B,Cat C',
        'authenticityToken': session.cookies['PLAY_SESSION'].split('AT=')[-1].replace('"', '')
    }

    r = session.post(url, data=data)

    data = json.loads(r.json()['data'])
    
    return data

data = _()

JSON(data)

<br>

### Electricity North West

https://www.enwl.co.uk/power-cuts/

In [None]:
# need to handle page size and number
'https://www.enwl.co.uk/power-outages/search?pageSize=10&postcodeOrReferenceNumber=&pageNumber=2&includeCurrent=true&includeResolved=true&includeTodaysPlanned=true&includeFuturePlanned=true&includeCancelledPlanned=true'

In [None]:
# incident_active (if known)
# logged/start time
# estimated time of fix
# postcodes impacted
# description

In [29]:
#hide
from nbdev.export import *
notebook2script()

Converted 01-retrieval.ipynb.
Converted 02-collation.ipynb.
