In [1]:
import json
from datetime import datetime

In [2]:
file_path: str = r'C:\Users\Gonzalo\Desktop\national_rail_project\src\sandbox\stomp\rtppm\rtppm_data.json'

In [5]:
with open(file_path) as f:
    data = json.load(f)

In [8]:
timestamp = int(data['timestamp'])


## National performance

- Total:
The total number of trains considered in this performance report.

- OnTime:
The number of trains that arrived on time into their destinations.

- Late:
The number of trains that arrived late to their destinations.

- CancelVeryLate:
The number of trains that were cancelled or arrived very late.

- PPM: <br>
    The Public Performance Measure (PPM) is a measure of train punctuality that combines OnTime and Late trains.<br>
    This dictionary contains the following keys:<br>
    - text: The PPM percentage performance in string format.
    - rag: The Rolling PPM performance category, one of the following (G, A, R, W)
    - ragDisplayFlag: A flag to indicate if the performance category should be displayed or not.
<br>
<br>
- RollingPPM: <br>
    The Rolling Public Performance Measure (Rolling PPM) is a moving average of PPM over a certain period of time. <br>
    This dictionary contains the following keys:
    - text: The Rolling PPM percentage performance in string format.
    - rag: The Rolling PPM performance category, one of the following (G, A, R, W)
    - trendInd: A trend indicator, one of the following (+, = , -)
<br>

| rag | Performance category             | trendInd | Trend direction         |
| --- | ---                              | ---      | ---                     |
| G   | Good performance                 | `+`      | Rising trend            |
| A   | Medium performance               | `=`      | No change/flat trend    |
| R   | Bad performance                  | `-`      | Falling trend           |
| W   | Unknown                          | N/A      | N/A                     |


In [30]:
def flatten_national_dict(nested_dict: dict, parent_key='', sep='_') -> dict:
    """ Recursively flattens a nested dictionary 
    with the following structure:
    
     example: dict =    "NationalPPM": {
                            "Total": "16652",
                            "OnTime": "15235",
                            "Late": "1417",
                            "CancelVeryLate": "499",
                            "PPM": {
                                "text": "91",
                                "rag": "A",
                                "ragDisplayFlag": "Y"
                            },
                            "RollingPPM": {
                                "text": "85",
                                "rag": "R",
                                "trendInd": "-"
                            }
                        },
    """
    items = []
    for key, value in nested_dict.items():
        new_key = f'{parent_key}{sep}{key}' if parent_key else key
        if isinstance(value, dict):
            items.extend(flatten_national_dict(value, new_key, sep=sep).items())
        else:
            items.append((new_key, value))
    return dict(items)

In [31]:
national_performance = data['RTPPMData']['NationalPage']['NationalPPM']
flatten_national_perf = flatten_national_dict(national_performance)
print(flatten_national_perf)

{'Total': '16652', 'OnTime': '15235', 'Late': '1417', 'CancelVeryLate': '499', 'PPM_text': '91', 'PPM_rag': 'A', 'PPM_ragDisplayFlag': 'Y', 'RollingPPM_text': '85', 'RollingPPM_rag': 'R', 'RollingPPM_trendInd': '-'}


## National sectors performance
<br>
A dictionary with keys representing different sectors. Each sector is a dictionary containing the following keys:
<br>
<br>

- sectorName: A string representing the name of the sector. <br>

- Total: The total number of trains in the sector <br>

- OnTime: The number of trains that arrived on time to their destinations. <br> 

- Late: The number of trains that arrived late to their destinations. <br> 

- CancelVeryLate: The number of trains that were cancelled or arrived very late. <br> 

- PPM_text: The Public Performance Measure (PPM) percentage performance in string format. <br> 

- PPM_rag: The PPM performance category, one of G (Good performance), A (Medium performance), R (Bad performance), or W (Unknown). <br> 

- RollingPPM_text: The Rolling Public Performance Measure (Rolling PPM) percentage performance in string format. <br> 

- RollingPPM_rag: The Rolling PPM performance category, one of the following (G, A, R, W) <br> 

- RollingPPM_trendInd: A trend indicator, one of the following (+, = , -) <br>

| rag | Performance category             | trendInd | Trend direction         |
| --- | ---                              | ---      | ---                     |
| G   | Good performance                 | `+`      | Rising trend            |
| A   | Medium performance               | `=`      | No change/flat trend    |
| R   | Bad performance                  | `-`      | Falling trend           |
| W   | Unknown                          | N/A      | N/A                     |


In [36]:
def flatten_national_sectors(nested_dict: list, parent_key='', sep='_') -> dict:
    """Iterates over a list of dictionaries and flattens them into a dict of dicts 
    
    example: dict =  "Sector": [
                            {
                                "SectorPPM": {
                                    "Total": "8880",
                                    "OnTime": "8202",
                                    "Late": "678",
                                    "CancelVeryLate": "228",
                                    "PPM": {
                                        "text": "92",
                                        "rag": "G"
                                    },
                                    "RollingPPM": {
                                        "text": "84",
                                        "rag": "R",
                                        "trendInd": "-"
                                    }
                                },
                                "sectorCode": "LSE",
                                "sectorDesc": "London and South East"
                            }]
    """
    new_data = {}
    # Traverse over the dicts inside the list
    for sector in nested_dict:
        # Create dict's from sector code and insert name
        new_data[sector["sectorCode"]] = {"sectorName": sector["sectorDesc"]} 
        # Traverse over the nested dict's inside SectorPPM
        for key, value in sector["SectorPPM"].items(): 
            # Explode the nested dicts and add their keys as part of the new key names
            if key == "PPM":
                new_data[sector["sectorCode"]]["PPM_text"] = value["text"]
                new_data[sector["sectorCode"]]["PPM_rag"] = value["rag"]
            elif key == "RollingPPM":
                new_data[sector["sectorCode"]]["RollingPPM_text"] = value["text"]
                new_data[sector["sectorCode"]]["RollingPPM_rag"] = value["rag"]
                new_data[sector["sectorCode"]]["RollingPPM_trendInd"] = value["trendInd"]
            else:
                # Insert data that isn't nested
                new_data[sector["sectorCode"]][key] = value
    return new_data
    return new_data


In [37]:
national_sectors_page = data['RTPPMData']['NationalPage']['Sector']
flatten_national_sectors_perf = flatten_national_sectors(national_sectors_page)
print(flatten_national_sectors_perf)

{'LSE': {'sectorName': 'London and South East', 'Total': '8880', 'OnTime': '8202', 'Late': '678', 'CancelVeryLate': '228', 'PPM_text': '92', 'PPM_rag': 'G', 'RollingPPM_text': '84', 'RollingPPM_rag': 'R', 'RollingPPM_trendInd': '-'}, 'LD': {'sectorName': 'Long Distance', 'Total': '1166', 'OnTime': '1045', 'Late': '121', 'CancelVeryLate': '65', 'PPM_text': '89', 'PPM_rag': 'A', 'RollingPPM_text': '89', 'RollingPPM_rag': 'A', 'RollingPPM_trendInd': '='}, 'REG': {'sectorName': 'Regional', 'Total': '4809', 'OnTime': '4283', 'Late': '526', 'CancelVeryLate': '184', 'PPM_text': '89', 'PPM_rag': 'A', 'RollingPPM_text': '84', 'RollingPPM_rag': 'R', 'RollingPPM_trendInd': '-'}, 'SCO': {'sectorName': 'Scotland', 'Total': '1797', 'OnTime': '1705', 'Late': '92', 'CancelVeryLate': '22', 'PPM_text': '94', 'PPM_rag': 'G', 'RollingPPM_text': '91', 'RollingPPM_rag': 'A', 'RollingPPM_trendInd': '-'}}


## National Operator performance

- Total: The total number of trains in the sector. <br>
- PPM: A dictionary with keys 'text' and 'rag', representing the Public Performance Measure percentage and performance category, respectively. <br>
- RollingPPM: A dictionary with keys 'text', 'rag', 'displayFlag', and 'trendInd', representing the Rolling Public Performance Measure percentage, performance category, display flag, and trend indicator, respectively. <br>
- code: A code representing the sector. <br>
- name: The name of the sector. <br>
- keySymbol: An optional key symbol for the sector. <br>

| rag | Performance category             | trendInd | Trend direction         |
| --- | ---                              | ---      | ---                     |
| G   | Good performance                 | `+`      | Rising trend            |
| A   | Medium performance               | `=`      | No change/flat trend    |
| R   | Bad performance                  | `-`      | Falling trend           |
| W   | Unknown                          | N/A      | N/A                     |

In [80]:
def flatten_national_operator(nested_dict: list) -> dict:
    """Iterates over a list of nested dictionaries and flattens them into a dict of dicts.
    example: dict = "Operator": [
                        {
                            "Total": "6",
                            "PPM": {
                                "text": "100",
                                "rag": "G"
                            },
                            "RollingPPM": {
                                "text": "-1",
                                "rag": "W",
                                "displayFlag": "Y"
                            },
                            "code": "35",
                            "name": "Caledonian Sleeper",
                            "keySymbol": "*"
                        }]
    """
    new_data = {}
    for record in nested_dict:
        # Create a dictionary to store the flattened record
        flat_record = {}
        # Add all key-value pairs to flat_record
        for key, value in record.items():
            if isinstance(value, dict):
                # Add PPM_text and PPM_rag keys for PPM dictionary
                if key == 'PPM':
                    flat_record['PPM_text'] = value['text']
                    flat_record['PPM_rag'] = value['rag']
                # Add RollingPPM_text, RollingPPM_rag, and RollingPPM_trendInd keys for RollingPPM dictionary
                elif key == 'RollingPPM':
                    flat_record['RollingPPM_text'] = value['text']
                    flat_record['RollingPPM_rag'] = value['rag']
                    flat_record['RollingPPM_trendInd'] = value.get('trendInd')
            elif key != 'code' and key != 'keySymbol':
                # Add the key-value pair directly to flat_record
                flat_record[key] = value
        # Add the flattened record to new_data using code as the key
        new_data[record['code']] = flat_record

    return new_data


In [81]:
national_operator = data['RTPPMData']['NationalPage']['Operator']
flatten_national_operator_perf = flatten_national_operator(national_operator)

In [86]:
for record in flatten_national_operator_perf.items():
    print(record)

('35', {'Total': '6', 'PPM_text': '100', 'PPM_rag': 'G', 'RollingPPM_text': '-1', 'RollingPPM_rag': 'W', 'RollingPPM_trendInd': None, 'name': 'Caledonian Sleeper'})
('74', {'Total': '281', 'PPM_text': '99', 'PPM_rag': 'G', 'RollingPPM_text': '97', 'RollingPPM_rag': 'G', 'RollingPPM_trendInd': '-', 'name': 'Chiltern'})
('30', {'Total': '1243', 'PPM_text': '97', 'PPM_rag': 'G', 'RollingPPM_text': '96', 'RollingPPM_rag': 'G', 'RollingPPM_trendInd': '-', 'name': 'London Overground'})
('33', {'Total': '527', 'PPM_text': '96', 'PPM_rag': 'G', 'RollingPPM_text': '85', 'RollingPPM_rag': 'R', 'RollingPPM_trendInd': '-', 'name': 'Elizabeth line'})
('79', {'Total': '267', 'PPM_text': '96', 'PPM_rag': 'G', 'RollingPPM_text': '97', 'RollingPPM_rag': 'G', 'RollingPPM_trendInd': '+', 'name': 'c2c'})
('61', {'Total': '123', 'PPM_text': '95', 'PPM_rag': 'G', 'RollingPPM_text': '94', 'RollingPPM_rag': 'G', 'RollingPPM_trendInd': '-', 'name': 'London North Eastern Railway'})
('28', {'Total': '381', 'PPM_