In [2]:
#Question No 1
import pandas as pd
import numpy as np

def calculate_distance_matrix(data):
    df = pd.DataFrame(data)

    # Create a matrix with unique toll locations (IDs)
    unique_ids = pd.unique(df[['id_1', 'id_2']].values.ravel())
    distance_matrix = pd.DataFrame(index=unique_ids, columns=unique_ids, dtype=float)

    # Initialize all distances as infinity (since not all paths are directly connected)
    distance_matrix[:] = np.inf
    np.fill_diagonal(distance_matrix.values, 0)  # Set diagonal to 0

    # Fill in known distances from the dataset
    for _, row in df.iterrows():
        id_1, id_2, distance = row['id_1'], row['id_2'], row['distance']
        distance_matrix.loc[id_1, id_2] = distance
        distance_matrix.loc[id_2, id_1] = distance  # Ensure symmetry

    # Apply Floyd-Warshall algorithm to compute the shortest cumulative distances
    for k in unique_ids:
        for i in unique_ids:
            for j in unique_ids:
                distance_matrix.loc[i, j] = min(distance_matrix.loc[i, j], distance_matrix.loc[i, k] + distance_matrix.loc[k, j])

    return distance_matrix

# Example usage:
data = [
    {'id_1': 1001400, 'id_2': 1001402, 'distance': 9.7},
    {'id_1': 1001402, 'id_2': 1001404, 'distance': 20.2},
    {'id_1': 1001404, 'id_2': 1001406, 'distance': 16.0},
    {'id_1': 1001406, 'id_2': 1001408, 'distance': 21.7},
    {'id_1': 1001408, 'id_2': 1001410, 'distance': 11.1},
    {'id_1': 1001410, 'id_2': 1001412, 'distance': 15.6},
    # Add more data here as needed
]

distance_matrix = calculate_distance_matrix(data)
print(distance_matrix)


         1001400  1001402  1001404  1001406  1001408  1001410  1001412
1001400      0.0      9.7     29.9     45.9     67.6     78.7     94.3
1001402      9.7      0.0     20.2     36.2     57.9     69.0     84.6
1001404     29.9     20.2      0.0     16.0     37.7     48.8     64.4
1001406     45.9     36.2     16.0      0.0     21.7     32.8     48.4
1001408     67.6     57.9     37.7     21.7      0.0     11.1     26.7
1001410     78.7     69.0     48.8     32.8     11.1      0.0     15.6
1001412     94.3     84.6     64.4     48.4     26.7     15.6      0.0


In [4]:
#Question No 2
def unroll_distance_matrix(distance_matrix):
    # Unroll the matrix into long-form DataFrame
    unrolled_df = distance_matrix.stack().reset_index()
    
    # Rename the columns
    unrolled_df.columns = ['id_start', 'id_end', 'distance']
    
    # Remove rows where id_start == id_end (i.e., diagonal elements)
    unrolled_df = unrolled_df[unrolled_df['id_start'] != unrolled_df['id_end']]
    
    return unrolled_df

# Example usage with the previously calculated distance_matrix:
unrolled_matrix = unroll_distance_matrix(distance_matrix)
print(unrolled_matrix)

    id_start   id_end  distance
1    1001400  1001402       9.7
2    1001400  1001404      29.9
3    1001400  1001406      45.9
4    1001400  1001408      67.6
5    1001400  1001410      78.7
6    1001400  1001412      94.3
7    1001402  1001400       9.7
9    1001402  1001404      20.2
10   1001402  1001406      36.2
11   1001402  1001408      57.9
12   1001402  1001410      69.0
13   1001402  1001412      84.6
14   1001404  1001400      29.9
15   1001404  1001402      20.2
17   1001404  1001406      16.0
18   1001404  1001408      37.7
19   1001404  1001410      48.8
20   1001404  1001412      64.4
21   1001406  1001400      45.9
22   1001406  1001402      36.2
23   1001406  1001404      16.0
25   1001406  1001408      21.7
26   1001406  1001410      32.8
27   1001406  1001412      48.4
28   1001408  1001400      67.6
29   1001408  1001402      57.9
30   1001408  1001404      37.7
31   1001408  1001406      21.7
33   1001408  1001410      11.1
34   1001408  1001412      26.7
35   100

In [5]:
#Question No 3
def find_ids_within_ten_percentage_threshold(df, reference_id):
    # Calculate the average distance for the reference id_start
    reference_avg = df[df['id_start'] == reference_id]['distance'].mean()
    
    # Calculate the 10% threshold (floor and ceiling)
    lower_bound = reference_avg * 0.90  # 10% below
    upper_bound = reference_avg * 1.10  # 10% above
    
    # Calculate the average distance for all id_start values
    avg_distances = df.groupby('id_start')['distance'].mean().reset_index()
    
    # Filter to find id_start values within the 10% threshold
    ids_within_threshold = avg_distances[
        (avg_distances['distance'] >= lower_bound) & 
        (avg_distances['distance'] <= upper_bound)
    ]['id_start'].tolist()
    
    # Sort the ids in ascending order
    ids_within_threshold.sort()
    
    return ids_within_threshold

# Example usage with the unrolled_matrix DataFrame and a reference id:
reference_id = 1001400
ids_within_threshold = find_ids_within_ten_percentage_threshold(unrolled_matrix, reference_id)
print(ids_within_threshold)

[1001400, 1001412]


In [6]:
#Question No 4
def calculate_toll_rate(df):
    # Define the rate coefficients for each vehicle type
    rate_coefficients = {
        'moto': 0.8,
        'car': 1.2,
        'rv': 1.5,
        'bus': 2.2,
        'truck': 3.6
    }
    
    # Calculate the toll rates for each vehicle type by multiplying the distance
    for vehicle_type, rate in rate_coefficients.items():
        df[vehicle_type] = df['distance'] * rate
    
    return df

# Example usage with the unrolled_matrix DataFrame
updated_df_with_toll_rates = calculate_toll_rate(unrolled_matrix)
print(updated_df_with_toll_rates.head())

   id_start   id_end  distance   moto    car      rv     bus   truck
1   1001400  1001402       9.7   7.76  11.64   14.55   21.34   34.92
2   1001400  1001404      29.9  23.92  35.88   44.85   65.78  107.64
3   1001400  1001406      45.9  36.72  55.08   68.85  100.98  165.24
4   1001400  1001408      67.6  54.08  81.12  101.40  148.72  243.36
5   1001400  1001410      78.7  62.96  94.44  118.05  173.14  283.32


In [7]:
#Question  No 5
import pandas as pd
from datetime import time

def calculate_time_based_toll_rates(df):
    # Define day and time intervals
    days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    
    time_intervals = [
        {'start_time': time(0, 0, 0), 'end_time': time(10, 0, 0), 'weekdays_discount': 0.8, 'weekends_discount': 0.7},
        {'start_time': time(10, 0, 0), 'end_time': time(18, 0, 0), 'weekdays_discount': 1.2, 'weekends_discount': 0.7},
        {'start_time': time(18, 0, 0), 'end_time': time(23, 59, 59), 'weekdays_discount': 0.8, 'weekends_discount': 0.7}
    ]
    
    # Prepare an empty list to store new rows with time-based toll rates
    time_based_data = []

    # Iterate over each row in the DataFrame and each time interval/day combination
    for index, row in df.iterrows():
        for day in days_of_week:
            for interval in time_intervals:
                # Determine the discount factor based on the day (weekday vs weekend)
                if day in ['Saturday', 'Sunday']:
                    discount_factor = interval['weekends_discount']
                else:
                    discount_factor = interval['weekdays_discount']
                
                # Apply the discount factor to the vehicle toll rates
                adjusted_row = row.copy()
                adjusted_row['start_day'] = day
                adjusted_row['end_day'] = day
                adjusted_row['start_time'] = interval['start_time']
                adjusted_row['end_time'] = interval['end_time']
                
                # Adjust each vehicle toll rate
                adjusted_row['moto'] *= discount_factor
                adjusted_row['car'] *= discount_factor
                adjusted_row['rv'] *= discount_factor
                adjusted_row['bus'] *= discount_factor
                adjusted_row['truck'] *= discount_factor

                # Append the adjusted row to the list
                time_based_data.append(adjusted_row)
    
    # Create a new DataFrame from the time-based toll rate data
    time_based_df = pd.DataFrame(time_based_data)
    
    return time_based_df

# Example usage with the DataFrame from Question 12 (updated_df_with_toll_rates)
time_based_toll_rates_df = calculate_time_based_toll_rates(updated_df_with_toll_rates)
print(time_based_toll_rates_df.head())

    id_start     id_end  distance   moto     car     rv     bus   truck  \
1  1001400.0  1001402.0       9.7  6.208   9.312  11.64  17.072  27.936   
1  1001400.0  1001402.0       9.7  9.312  13.968  17.46  25.608  41.904   
1  1001400.0  1001402.0       9.7  6.208   9.312  11.64  17.072  27.936   
1  1001400.0  1001402.0       9.7  6.208   9.312  11.64  17.072  27.936   
1  1001400.0  1001402.0       9.7  9.312  13.968  17.46  25.608  41.904   

  start_day  end_day start_time  end_time  
1    Monday   Monday   00:00:00  10:00:00  
1    Monday   Monday   10:00:00  18:00:00  
1    Monday   Monday   18:00:00  23:59:59  
1   Tuesday  Tuesday   00:00:00  10:00:00  
1   Tuesday  Tuesday   10:00:00  18:00:00  
