# Route results checker

##  TLDR: 

This code validates the results of routes output.

## Author: 

Tiago Tamagusko (tamagusko@gmail.com)  
Version: 0.1 beta  
Date: 2023-06-07

# Test the results

This code tests whether the calculated paths are valid. Thus, the tests performed are:

1. All paths calculated: For each unique route_id, the code checks whether three paths—namely the 'safest', 'balanced', and 'shortest'—have been calculated.
2. Correct route types: The code verifies that for each route, the route_type is correctly classified as either 'safest', 'balanced', or 'shortest'.
3. Consistent coordinates: The code checks whether the latitude and longitude coordinates of origin and destination remain consistent across all the three routes of each unique route_id.
4. Consistent distances: The code tests the logic of route distances, confirming that the distance of the 'shortest' route is indeed smaller than that of the 'balanced' route, which in turn should be smaller than the 'safest' route.
5. Consistent durations: An alert is triggered if the duration doesn't follow the expected logic: the duration of the 'safest' route should be greater than the 'balanced' route, which should be longer than the 'shortest' route.

The results of these tests are recorded in the 'test_code' column in the dataset. Each type of error corresponds to a unique code:

- 0: No errors detected.
- 1: Not all routes have been calculated.
- 2: Error in path calculation (route_type not correctly classified as 'safe', 'balanced', or 'shortest').
- 3: Inconsistent coordinates for the origin or destination across different routes for the same route_id.
- 4: Inconsistent distances (shortest distance larger than balanced, or balanced distance larger than safest).
- 5: Inconsistent durations (shortest duration longer than balanced, or balanced duration longer than safest).

In [None]:
import pandas as pd
from google.colab import files
import io

def load_data():
    """Loads data from a CSV file uploaded from your local machine."""
    # upload CSV file
    uploaded_file = files.upload()
    file_name = list(uploaded_file.keys())[0]
    
    # read the CSV data
    df = pd.read_csv(io.BytesIO(uploaded_file[file_name]))
    
    # add test columns
    df['test_code'] = ''
    df['test_error_description'] = ''
    return df
    
    return df


def check_paths(df):
    """Checks if three paths were calculated for each route_id."""
    if df.groupby('route_id').size().max() != 3:
        df['test_code'] = df.apply(lambda row: f"{row['test_code']};1" if row['test_code'] else "1", axis=1)
        df['test_error_description'] = df.apply(lambda row: f"{row['test_error_description']};Not all routes calculated." if row['test_error_description'] else "Not all routes calculated.", axis=1)


def check_route_types(df):
    """Checks if the safest, balanced and shortest options were calculated for each route_id."""
    route_types = set(['safest', 'balanced', 'shortest'])
    if not set(df.route_type.unique()).issubset(route_types):
        df.loc[~df.route_type.isin(route_types), 'test_code'] = df.loc[~df.route_type.isin(route_types), 'test_code'].apply(lambda x: f"{x};2" if x else "2")
        df.loc[~df.route_type.isin(route_types), 'test_error_description'] = df.loc[~df.route_type.isin(route_types), 'test_error_description'].apply(lambda x: f"{x};Error in path calculation." if x else "Error in path calculation.")


def check_coordinates(df):
    """Checks if the coordinates for the three routes are the same."""
    df_coord = df.groupby('route_id').agg({'origin_latitude': 'nunique', 'origin_longitude': 'nunique', 'dest_latitude': 'nunique', 'dest_longitude': 'nunique'})
    route_ids_incorrect_coords = df_coord.loc[(df_coord != 1).any(axis=1)].index
    df.loc[df.route_id.isin(route_ids_incorrect_coords), 'test_code'] = df.loc[df.route_id.isin(route_ids_incorrect_coords), 'test_code'].apply(lambda x: f"{x};3" if x else "3")
    df.loc[df.route_id.isin(route_ids_incorrect_coords), 'test_error_description'] = df.loc[df.route_id.isin(route_ids_incorrect_coords), 'test_error_description'].apply(lambda x: f"{x};Different coordinates for origin or destination." if x else "Different coordinates for origin or destination.")


def check_distances(df):
    """Checks if the distances are consistent."""
    df_distance = df.pivot(index='route_id', columns='route_type', values='distance')
    route_ids_inconsistent_dist = df_distance.loc[(df_distance['safest'] <= df_distance['balanced']) | (df_distance['balanced'] <= df_distance['shortest'])].index
    df.loc[df.route_id.isin(route_ids_inconsistent_dist), 'test_code'] = df.loc[df.route_id.isin(route_ids_inconsistent_dist), 'test_code'].apply(lambda x: f"{x};4" if x else "4")
    df.loc[df.route_id.isin(route_ids_inconsistent_dist), 'test_error_description'] = df.loc[df.route_id.isin(route_ids_inconsistent_dist), 'test_error_description'].apply(lambda x: f"{x};Inconsistent distances." if x else "Inconsistent distances.")


def check_durations(df):
    """Checks if the durations are consistent."""
    df_duration = df.pivot(index='route_id', columns='route_type', values='duration')
    route_ids_inconsistent_duration = df_duration.loc[(df_duration['safest'] <= df_duration['balanced']) | (df_duration['balanced'] <= df_duration['shortest'])].index
    df.loc[df.route_id.isin(route_ids_inconsistent_duration), 'test_code'] = df.loc[df.route_id.isin(route_ids_inconsistent_duration), 'test_code'].apply(lambda x: f"{x};5" if x else "5")
    df.loc[df.route_id.isin(route_ids_inconsistent_duration), 'test_error_description'] = df.loc[df.route_id.isin(route_ids_inconsistent_duration), 'test_error_description'].apply(lambda x: f"{x};Inconsistent durations." if x else "Inconsistent durations.")


def save_data(df, file_name):
    """Saves the DataFrame to a CSV file and downloads it to your local machine."""
    df.to_csv(file_name, index=False)
    files.download(file_name)


def main():
    df = load_data()
    check_paths(df)
    check_route_types(df)
    check_coordinates(df)
    check_distances(df)
    check_durations(df)
    save_data(df, "routes_checked.csv")
    # print all route_ids with a non-empty test_code
    # print(df[df['test_code'] != '']['route_id'])

if __name__ == "__main__":
    main()
    

Saving output (1).csv to output (1) (1).csv
0      458
1      458
2      458
3      310
4      310
      ... 
292    121
293    121
294    944
295    944
296    944
Name: route_id, Length: 219, dtype: int64
