# NHS SitReps

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

## Imports

Standard python libraries plus determination of projdir, basic printable class, etc

In [6]:
import os
from datetime import datetime

import csv
from xlrd import open_workbook, xldate_as_tuple

import numpy as np
import matplotlib.pyplot as plt

import common_core
import nhs_core

## Configuration

Data to download from the NHS statistical work area

In [7]:
MAX_COLS_WITH_HEADERS = 6

REGION_HEADING = "NHS England Region"
CODE_HEADING = "Code"
NAME_HEADING = "Name"

CC_AVAIL_HEADING = "CC Adult avail"
CC_OPEN_HEADING = "CC Adult Open"
CC_OCC_HEADING = "CC Adult Occ"

In [8]:
legacyTrustCodes = \
{
    "RDD": {"name": "Basildon And Thurrock University Hospitals NHS Foundation Trust", "region": "East of England"},
    "RC1": {"name": "Bedford Hospital NHS Trust", "region": "East of England"},
    "RLU": {"name": "Birmingham Women's NHS Foundation Trust", "region": "Midlands"},
    "RJF": {"name": "Burton Hospitals NHS Foundation Trust", "region": "Midlands"},
    "RR1": {"name": "Heart Of England NHS Foundation Trust", "region": "Midlands"},
    "RQQ": {"name": "Hinchingbrooke Health Care NHS Trust", "region": "East of England"},
    "RGQ": {"name": "Ipswich Hospital NHS Trust", "region": "East of England"},
    "RQ8": {"name": "Mid Essex Hospital Services NHS Trust", "region": "East of England"},
    "RW3": {"name": "Central Manchester University Hospitals NHS Foundation Trust", "region": "North West"},
    "RLN": {"name": "City Hospitals Sunderland NHS Foundation Trust", "region": "North East and Yorkshire"},
    "RNL": {"name": "North Cumbria University Hospitals NHS Trust", "region": "North East and Yorkshire"},
    "RQ6": {"name": "Royal Liverpool And Broadgreen University Hospitals NHS Trust", "region": "North West"},
    "RE9": {"name": "South Tyneside NHS Foundation Trust", "region": "North East and Yorkshire"},
    "RM2": {"name": "University Hospital Of South Manchester NHS Foundation Trust", "region": "North West"},
    "RD3": {"name": "Poole Hospital NHS Foundation Trust", "region": "South West"},
    "RBA": {"name": "Taunton And Somerset NHS Foundation Trust", "region": "South West"},
    "RDZ": {"name": "The Royal Bournemouth And Christchurch Hospitals NHS Foundation Trust", "region": "South West"},
    "RA3": {"name": "Weston Area Health NHS Trust", "region": "South West"}
}

   ## XLSX to CSV

In [9]:
def findRowNos(sheet, heading):
    '''Find rows with the specified heading.'''

    rowNos = []

    for rowNo in range(sheet.nrows):
        for colNo in range(MAX_COLS_WITH_HEADERS):
            cellValue = sheet.cell(rowNo, colNo).value

            if isinstance(cellValue, str) and cellValue.startswith(heading):
                rowNos.append(rowNo)

    return rowNos


def findColNos(sheet, rowNo, heading, aliases = []):
    '''Determine the columns of headings from the cells in the specified row.'''

    colNos = []

    for colNo in range(sheet.ncols):
        cellValue = sheet.cell(rowNo, colNo).value

        if cellValue == heading:
            colNos.append(colNo)
        else:
            for alias in aliases:
                if cellValue == alias:
                    colNos.append(colNo)
                    break

    return colNos


def findReportDates(sheet, rowNo, dateMode):
    """Find all of the dates in the sheet"""
    
    reportDates = []

    for colNo in range(sheet.ncols):
        cellValue = sheet.cell(rowNo, colNo).value

        if isinstance(cellValue, str):
            if len(cellValue) == 8:
                reportDate = cellValue
                reportDates.append(reportDate)
        else:
            if int(cellValue) > 0:
                year, month, day, hour, minute, second = xldate_as_tuple(cellValue, dateMode)
                reportDate = f"{year:04}-{month:02}-{day:02}"
                reportDates.append(reportDate)

    return reportDates
    

def scanSheet(trusts, sheet, dateMode=0, latest=False):
    """Scan all rows to detect regions, codes and names"""
    
    rowNo = findRowNos(sheet, REGION_HEADING)[0]

    regionColNo = findColNos(sheet, rowNo, REGION_HEADING)[0]
    codeColNo = findColNos(sheet, rowNo, CODE_HEADING)[0]
    nameColNo = findColNos(sheet, rowNo, NAME_HEADING)[0]

    reportDates = findReportDates(sheet, rowNo - 1, dateMode)
    
    ccOpenColNos = findColNos(sheet, rowNo, CC_OPEN_HEADING, [CC_AVAIL_HEADING])
    assert len(reportDates) == len(ccOpenColNos), "Mismatch of dates and CC beds open"

    ccOccColNos = findColNos(sheet, rowNo, CC_OCC_HEADING)
    assert len(reportDates) == len(ccOccColNos), "Mismatch of dates and CC beds occupied"

    for rowNo in range(rowNo, sheet.nrows):
        code = sheet.cell(rowNo, codeColNo).value
        
        if len(code) == 3:
            name = sheet.cell(rowNo, nameColNo).value
            region = sheet.cell(rowNo, regionColNo).value

            if code not in trusts:
                if latest == True:
                    trusts[code] = {"name":name, "region":region}
                else:
                    trusts[code] = {"name":name, "region":legacyTrustCodes[code]["region"]}
                
    return codeColNo, reportDates, ccOpenColNos, ccOccColNos
   

def saveCsv(year, areaType, areaName, reportDates, ccOpen, ccOcc):
    """Save area to CSV"""

    # Ensure CSV path exists
    csvPath = os.path.join(common_core.projdir, "data", "nhs-statistics", "csv", "weekly", "sitreps", str(year), areaType)
    if not os.path.exists(csvPath):
        os.makedirs(csvPath)

    # Determine safe filename
    csvFn = os.path.join(csvPath, common_core.getSafeName(areaName) + ".csv")

    # Save data to CSV
    header = "date,cc_open,cc_occupied"
    data = np.column_stack((reportDates, ccOpen, ccOcc))   
    np.savetxt(csvFn, data, fmt='%s', delimiter=',', header=header, comments='')
    
    
def loadSitRep(trusts, year, fileName, latest = False):
    """Load SitReps data into trusts"""

    print(f"Processing {year} - {os.path.basename(fileName)}...")
    
    workbook = open_workbook(fileName)
    
    for sheet in workbook.sheets():           
        if sheet.name == "Adult critical care":
            codeColNo, reportDates, ccOpenColNos, ccOccColNos = scanSheet(trusts, sheet, workbook.datemode, latest = latest)
            
            numDates = len(reportDates)
            totalOpen = np.zeros(numDates, dtype=int)
            totalOcc = np.zeros(numDates, dtype=int)

            for region in nhs_core.nhsRegionNames:
                regionOpen = np.zeros(numDates, dtype=int)
                regionOcc = np.zeros(numDates, dtype=int)
                
                for rowNo in range(sheet.nrows):
                    trustCode = sheet.cell(rowNo, codeColNo).value

                    if trustCode in trusts:
                        trust = trusts[trustCode]

                        if trust["region"] == region:                           
                            for i in range(len(ccOpenColNos)):
                                cellValue = sheet.cell(rowNo, ccOpenColNos[i]).value
                                if cellValue not in ["-", "N/A"]:
                                    regionOpen[i] += cellValue
                                
                                cellValue = sheet.cell(rowNo, ccOccColNos[i]).value
                                if cellValue not in ["-", "N/A"]:
                                    regionOcc[i] += cellValue

                totalOpen += regionOpen
                totalOcc += regionOcc

                saveCsv(year, "region", region, reportDates, regionOpen, regionOcc)

            # Now do England
            
            nationOpen = np.zeros(numDates, dtype=int)
            nationOcc = np.zeros(numDates, dtype=int)
            
            for nation in nhs_core.nationNames:
                rowNo = findRowNos(sheet, nation.upper())[0]

                for i in range(len(ccOpenColNos)):
                    cellValue = sheet.cell(rowNo, ccOpenColNos[i]).value
                    if cellValue not in ["-", "N/A"]:
                        nationOpen[i] += cellValue

                    cellValue = sheet.cell(rowNo, ccOccColNos[i]).value
                    if cellValue not in ["-", "N/A"]:
                        nationOcc[i] += cellValue

                # A few of the totals for England were incorrect in 2016 data
                if year != 2016:
                    assert (nationOpen == totalOpen).all(), "Regions do not add up to nation"
                    assert (nationOcc == totalOcc).all(), "Regions do not add up to nation"

                saveCsv(year, "nation", nation, reportDates, totalOpen, totalOcc)


def convertSitReps():
    """Process all available SitReps"""

    trusts = {}

    partNames = nhs_core.downloadSitReps(skipExisting=True)
    for partName in partNames:
        year = nhs_core.SITREPS_YEAR[:4]
        xlsxFn = os.path.join(nhs_core.rawPath, partName)
        loadSitRep(trusts, year, xlsxFn, latest = True)

    for sitRepsFile in nhs_core.sitRepsFiles:
        for year in range(2016, 2020):
            xlsxPath = os.path.join(nhs_core.rawPath, sitRepsFile[0], nhs_core.SITREPS_DIR, f"{year}-{year + 1}")
            for basename in os.listdir(xlsxPath):
                fileName = os.path.join(xlsxPath, basename)
                loadSitRep(trusts, year, fileName, latest = False)

   ## Running Interactively

In [10]:
if __name__ == '__main__':
    convertSitReps()

Processing 2020 - UEC-Daily-SitRep-Acute-Web-File-Timeseries-3.xlsx...
Processing 2016 - DailySR-Web-file-Time-Series-18.xlsx...
Processing 2017 - Winter-data-Timeseries-20180304.xlsx...
Processing 2018 - Winter-data-timeseries-20190307.xlsx...
Processing 2019 - Winter-SitRep-Acute-Time-series-2-December-2019-1-March-2020.xlsx...
