In [1]:
import json
from datetime import datetime

In [2]:
file_path: str = r'/Users/gonzo/Desktop/RailScope/national_rail_project/src/sandbox/stomp/rtppm/rtppm_data.json'

In [3]:
with open(file_path) as f:
    data = json.load(f)

In [4]:
timestamp = int(data['RTPPMDataMsgV1']['timestamp'])
national_performance = data['RTPPMDataMsgV1']['RTPPMData']['NationalPage']['NationalPPM']
national_sectors_page = data['RTPPMDataMsgV1']['RTPPMData']['NationalPage']['Sector']

## National performance

- Total:
The total number of trains considered in this performance report.

- OnTime:
The number of trains that arrived on time into their destinations.

- Late:
The number of trains that arrived late to their destinations.

- CancelVeryLate:
The number of trains that were cancelled or arrived very late.

- PPM: <br>
    The Public Performance Measure (PPM) is a measure of train punctuality that combines OnTime and Late trains.<br>
    This dictionary contains the following keys:<br>
    - text: The PPM percentage performance in string format.
    - rag: The Rolling PPM performance category, one of the following (G, A, R, W)
    - ragDisplayFlag: A flag to indicate if the performance category should be displayed or not.
<br>
<br>
- RollingPPM: <br>
    The Rolling Public Performance Measure (Rolling PPM) is a moving average of PPM over a certain period of time. <br>
    This dictionary contains the following keys:
    - text: The Rolling PPM percentage performance in string format.
    - rag: The Rolling PPM performance category, one of the following (G, A, R, W)
    - trendInd: A trend indicator, one of the following (+, = , -)
<br>

| rag | Performance category             | trendInd | Trend direction         |
| --- | ---                              | ---      | ---                     |
| G   | Good performance                 | `+`      | Rising trend            |
| A   | Medium performance               | `=`      | No change/flat trend    |
| R   | Bad performance                  | `-`      | Falling trend           |
| W   | Unknown                          | N/A      | N/A                     |


In [32]:
def flatten_national_dict(nested_dict: dict, parent_key='', sep='_') -> dict:
    """ Recursively flattens a nested dictionary"""
    items = []
    for key, value in nested_dict.items():
        new_key = f'{parent_key}{sep}{key}' if parent_key else key
        if isinstance(value, dict):
            items.extend(flatten_national_dict(value, new_key, sep=sep).items())
        else:
            items.append((new_key, value))
    return dict(items)

In [34]:
flatten_national_perf = flatten_national_dict(national_performance)
print(flatten_national_perf)

{'Total': '18661', 'OnTime': '17010', 'Late': '1651', 'CancelVeryLate': '566', 'PPM_text': '91', 'PPM_rag': 'A', 'PPM_ragDisplayFlag': 'Y', 'RollingPPM_text': '88', 'RollingPPM_rag': 'R', 'RollingPPM_trendInd': '-'}


## National sectors performance
<br>
A dictionary with keys representing different sectors. Each sector is a dictionary containing the following keys:
<br>
<br>

- sectorName: A string representing the name of the sector. <br>

- Total: The total number of trains in the sector <br>

- OnTime: The number of trains that arrived on time to their destinations. <br> 

- Late: The number of trains that arrived late to their destinations. <br> 

- CancelVeryLate: The number of trains that were cancelled or arrived very late. <br> 

- PPM_text: The Public Performance Measure (PPM) percentage performance in string format. <br> 

- PPM_rag: The PPM performance category, one of G (Good performance), A (Medium performance), R (Bad performance), or W (Unknown). <br> 

- RollingPPM_text: The Rolling Public Performance Measure (Rolling PPM) percentage performance in string format. <br> 

- RollingPPM_rag: The Rolling PPM performance category, one of the following (G, A, R, W) <br> 

- RollingPPM_trendInd: A trend indicator, one of the following (+, = , -) <br>

| rag | Performance category             | trendInd | Trend direction         |
| --- | ---                              | ---      | ---                     |
| G   | Good performance                 | `+`      | Rising trend            |
| A   | Medium performance               | `=`      | No change/flat trend    |
| R   | Bad performance                  | `-`      | Falling trend           |
| W   | Unknown                          | N/A      | N/A                     |


In [28]:
def flatten_national_sectors(nested_dict: list) -> dict:
    new_data = {}
    # Traverse over the dicts inside the list
    for sector in nested_dict:
        # Create dict's from sector code and insert name
        new_data[sector["sectorCode"]] = {"sectorName": sector["sectorDesc"]} 
        # Traverse over the nested dict's inside SectorPPM
        for key, value in sector["SectorPPM"].items(): 
            # Explode the nested dicts and add their keys as part of the new key names
            if key == "PPM":
                new_data[sector["sectorCode"]]["PPM_text"] = value["text"]
                new_data[sector["sectorCode"]]["PPM_rag"] = value["rag"]
            elif key == "RollingPPM":
                new_data[sector["sectorCode"]]["RollingPPM_text"] = value["text"]
                new_data[sector["sectorCode"]]["RollingPPM_rag"] = value["rag"]
                new_data[sector["sectorCode"]]["RollingPPM_trendInd"] = value["trendInd"]
            else:
                # Insert data that isn't nested
                new_data[sector["sectorCode"]][key] = value
                
    return new_data


In [35]:
flatten_national_sectors_perf = flatten_national_sectors(national_sectors_page)
print(flatten_national_sectors_perf)

{'LSE': {'sectorName': 'London and South East', 'Total': '9973', 'OnTime': '9170', 'Late': '803', 'CancelVeryLate': '270', 'PPM_text': '91', 'PPM_rag': 'A', 'RollingPPM_text': '88', 'RollingPPM_rag': 'R', 'RollingPPM_trendInd': '-'}, 'LD': {'sectorName': 'Long Distance', 'Total': '1331', 'OnTime': '1194', 'Late': '137', 'CancelVeryLate': '70', 'PPM_text': '89', 'PPM_rag': 'A', 'RollingPPM_text': '90', 'RollingPPM_rag': 'A', 'RollingPPM_trendInd': '+'}, 'REG': {'sectorName': 'Regional', 'Total': '5393', 'OnTime': '4796', 'Late': '597', 'CancelVeryLate': '198', 'PPM_text': '88', 'PPM_rag': 'R', 'RollingPPM_text': '87', 'RollingPPM_rag': 'R', 'RollingPPM_trendInd': '-'}, 'SCO': {'sectorName': 'Scotland', 'Total': '1964', 'OnTime': '1850', 'Late': '114', 'CancelVeryLate': '28', 'PPM_text': '94', 'PPM_rag': 'G', 'RollingPPM_text': '86', 'RollingPPM_rag': 'R', 'RollingPPM_trendInd': '-'}}
