In [1]:
import numpy as np
import pandas as pd

# Question 9: Distance Matrix Calculation

In [2]:
df = pd.read_csv(r"D:\Imarticus\Nandugade_Mapup\MapUp-DA-Assessment-2024\datasets\dataset-2.csv")

def calculate_distance_matrix(df):
    unique_ids = pd.unique(df[['id_start', 'id_end']].values.ravel('K'))
    
    distance_matrix = pd.DataFrame(np.inf, index=unique_ids, columns=unique_ids)
    np.fill_diagonal(distance_matrix.values, 0)
    
    for _, row in df.iterrows():
        distance_matrix.loc[row['id_start'], row['id_end']] = row['distance']
        distance_matrix.loc[row['id_end'], row['id_start']] = row['distance']  
        
    for k in unique_ids:
        for i in unique_ids:
            for j in unique_ids:
                if distance_matrix.at[i, j] > distance_matrix.at[i, k] + distance_matrix.at[k, j]:
                    distance_matrix.at[i, j] = distance_matrix.at[i, k] + distance_matrix.at[k, j]

    return distance_matrix

# Distance matrix
distance_matrix = calculate_distance_matrix(df)
distance_matrix

Unnamed: 0,1001400,1001402,1001404,1001406,1001408,1001410,1001412,1001414,1001416,1001418,...,1001458,1001460,1001461,1001462,1001464,1001466,1001468,1001470,1001437,1001472
1001400,0.0,9.7,29.9,45.9,67.6,78.7,94.3,112.5,125.7,139.3,...,348.8,353.9,366.7,371.8,398.5,407.0,417.7,428.3,242.1,444.3
1001402,9.7,0.0,20.2,36.2,57.9,69.0,84.6,102.8,116.0,129.6,...,339.1,344.2,357.0,362.1,388.8,397.3,408.0,418.6,232.4,434.6
1001404,29.9,20.2,0.0,16.0,37.7,48.8,64.4,82.6,95.8,109.4,...,318.9,324.0,336.8,341.9,368.6,377.1,387.8,398.4,212.2,414.4
1001406,45.9,36.2,16.0,0.0,21.7,32.8,48.4,66.6,79.8,93.4,...,302.9,308.0,320.8,325.9,352.6,361.1,371.8,382.4,196.2,398.4
1001408,67.6,57.9,37.7,21.7,0.0,11.1,26.7,44.9,58.1,71.7,...,281.2,286.3,299.1,304.2,330.9,339.4,350.1,360.7,174.5,376.7
1001410,78.7,69.0,48.8,32.8,11.1,0.0,15.6,33.8,47.0,60.6,...,270.1,275.2,288.0,293.1,319.8,328.3,339.0,349.6,163.4,365.6
1001412,94.3,84.6,64.4,48.4,26.7,15.6,0.0,18.2,31.4,45.0,...,254.5,259.6,272.4,277.5,304.2,312.7,323.4,334.0,147.8,350.0
1001414,112.5,102.8,82.6,66.6,44.9,33.8,18.2,0.0,13.2,26.8,...,236.3,241.4,254.2,259.3,286.0,294.5,305.2,315.8,129.6,331.8
1001416,125.7,116.0,95.8,79.8,58.1,47.0,31.4,13.2,0.0,13.6,...,223.1,228.2,241.0,246.1,272.8,281.3,292.0,302.6,116.4,318.6
1001418,139.3,129.6,109.4,93.4,71.7,60.6,45.0,26.8,13.6,0.0,...,209.5,214.6,227.4,232.5,259.2,267.7,278.4,289.0,102.8,305.0


# Question 10: Unroll Distance Matrix

In [14]:
def unroll_distance_matrix(distance_matrix):
    unrolled_data = []
    
    for i in distance_matrix.index:
        for j in distance_matrix.columns:
            if i != j:  
                unrolled_data.append({'id_start': i, 'id_end': j, 'distance': distance_matrix.at[i, j]})
    
    unrolled_df = pd.DataFrame(unrolled_data)
    return unrolled_df

unrolled_distance_df = unroll_distance_matrix(distance_matrix)
unrolled_distance_df

Unnamed: 0,id_start,id_end,distance
0,1001400,1001402,9.7
1,1001400,1001404,29.9
2,1001400,1001406,45.9
3,1001400,1001408,67.6
4,1001400,1001410,78.7
...,...,...,...
1801,1001472,1001464,45.8
1802,1001472,1001466,37.3
1803,1001472,1001468,26.6
1804,1001472,1001470,16.0


# Question 11: Finding IDs within Percentage Threshold

In [13]:
def find_ids_within_ten_percentage_threshold(unrolled_df, reference_id):
    reference_distances = unrolled_df[unrolled_df['id_start'] == reference_id]['distance']
    
    if reference_distances.empty:
        return [] 
    average_distance = reference_distances.mean()
    
    lower_bound = average_distance * 0.9
    upper_bound = average_distance * 1.1
    
    ids_within_threshold = unrolled_df[(unrolled_df['distance'] >= lower_bound) & 
                                       (unrolled_df['distance'] <= upper_bound)]
    
    return sorted(ids_within_threshold['id_start'].unique())

# Example 
# reference_id_example = 1001400
# ids_within_threshold = find_ids_within_ten_percentage_threshold(unrolled_distance_matrix, reference_id_example)
# ids_within_threshold

# Question 12: Calculate Toll Rate

In [15]:
def calculate_toll_rate(unrolled_df):
    rates = {
        'moto': 0.8,
        'car': 1.2,
        'rv': 1.5,
        'bus': 2.2,
        'truck': 3.6
    }
    
    for vehicle, rate in rates.items():
        unrolled_df[vehicle] = unrolled_df['distance'] * rate
    
    return unrolled_df

# Example 
#toll_rate_df = calculate_toll_rate(unrolled_distance_matrix)
#toll_rate_df

# Question 13: Calculate Time-Based Toll Rates

In [17]:
import datetime

def calculate_time_based_toll_rates(toll_rate_df):
    expanded_rows = []
    days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

    time_ranges = [
        (datetime.time(0, 0), datetime.time(10, 0), 0.8),   
        (datetime.time(10, 0), datetime.time(18, 0), 1.2),  
        (datetime.time(18, 0), datetime.time(23, 59, 59), 0.8),  
    ]
    weekend_discount = 0.7
    for (id_start, id_end), group in toll_rate_df.groupby(['id_start', 'id_end']):
        distance = group['distance'].iloc[0]  
        for day in days_of_week:
            if day in days_of_week[:5]:  
                for start_time, end_time, discount in time_ranges:
                    new_row = {
                        'id_start': id_start,
                        'id_end': id_end,
                        'distance': distance,
                        'start_day': day,
                        'start_time': start_time,
                        'end_day': 'Friday' if day == 'Monday' else days_of_week[days_of_week.index(day)+1],  
                        'end_time': end_time,
                    }
                   
                    for vehicle in ['moto', 'car', 'rv', 'bus', 'truck']:
                        new_row[vehicle] = group[vehicle].iloc[0] * discount
                    expanded_rows.append(new_row)
            else:  
                new_row = {
                    'id_start': id_start,
                    'id_end': id_end,
                    'distance': distance,
                    'start_day': day,
                    'start_time': datetime.time(0, 0),
                    'end_day': day,
                    'end_time': datetime.time(23, 59, 59),
                }
                for vehicle in ['moto', 'car', 'rv', 'bus', 'truck']:
                    new_row[vehicle] = group[vehicle].iloc[0] * weekend_discount
                expanded_rows.append(new_row)

    expanded_df = pd.DataFrame(expanded_rows)
    
    return expanded_df

# Example 
#time_based_toll_rates_df = calculate_time_based_toll_rates(toll_rate_df)
#time_based_toll_rates_df