# Route Factor Calculator

## TLDR:
Calculates the route factor for each route_type (shortest, balanced, safest).

## Author:
Tiago Tamagusko (tamagusko@gmail.com)
Version: 0.1 beta
Date: 2023-06-22

## Description:
The code implements a route factor calculation for each route based on the equation:

$$
\text{Route factor} = \frac{1}{k} \times \left(\sum_{i=1}^k\frac{\text{0.6} \times \text{TravelTime}}{\text{15 min}} + \sum_{i=1}^k\frac{\text{0.4} \times \text{TotalDistance}}{\text{5 km}}\right)
$$

where:
- $k$ is the number of elements in each route type.
- $\text{TravelTime}$ is the travel time for each route, measured in minutes.
- $\text{TotalDistance}$ is the total distance for each route, measured in kilometers.


In [None]:
import pandas as pd
from google.colab import files
import io

In [None]:
# Load and clean data

def load_data():
    """Loads data from a CSV file uploaded from your local machine."""

    uploaded_file = files.upload()
    file_name = list(uploaded_file.keys())[0]
    df = pd.read_csv(io.BytesIO(uploaded_file[file_name]))

    return df


def save_data(df, filename):
    """Saves the data to a CSV file."""

    df.to_csv(filename, index=False)
    print(f"Data successfully saved to {filename}")
    files.download(filename)


def filter_data(df):
    """Filters the data where test_code and error_description is null."""

    filtered_df = df[df['test_code'].isna() & df['error_description'].isna()]
    route_id_counts = filtered_df['route_id'].value_counts()
    valid_route_ids = route_id_counts[route_id_counts >= 3].index
    filtered_df = filtered_df[filtered_df['route_id'].isin(valid_route_ids)]

    print("Number of unique routes:", filtered_df['route_id'].nunique())

    return filtered_df



In [None]:
# file to upload: routes_checked.csv
df = load_data()
filtered_df = filter_data(df)
# save_data(filtered_df, 'filtered_data.csv')

Saving routes_checked.csv to routes_checked (1).csv
Number of unique routes: 125


In [None]:
def calculate_route_factor(df):
    """
    Calculates the route factor for each row using the given equation and adds it to the DataFrame.

    Equation:
    RouteFactor = 0.6 * (TravelTime / 15 min) + 0.4 * (TotalDistance / 5 km)
    """
    df['distance'] = df['distance'].astype(float)
    df['duration'] = df['duration'].astype(float)

    # Convert 'duration' to minutes
    df['duration_min'] = df['duration'] / 100


    df['route_factor'] = (0.6 * df['duration_min'] / 15) + (0.4 * df['distance'] / 5)
    return df


def group_route_factors(df):
    """Groups the data by 'route_type', calculates the mean and standard deviation of 'route_factor' for each group,
    and returns these values in a DataFrame."""
    route_factors_grouped = df.groupby('route_type')['route_factor'].agg(['mean', 'std'])

    route_factors_grouped['result'] = route_factors_grouped.apply(lambda x: f"{x['mean']:.2f} ± {x['std']:.2f}", axis=1)
    return route_factors_grouped['result']





In [None]:
result_for_all_rows = calculate_route_factor(filtered_df)
result = group_route_factors(result_for_all_rows)
result

route_type
balanced    1.64 ± 0.45
safest      1.64 ± 0.45
shortest    1.64 ± 0.45
Name: result, dtype: object