In [2]:
#Question 1 (Distance Matrix Calculation)
import pandas as pd
import networkx as nx

def calculate_distance_matrix(df):
    """
    Calculate a distance matrix based on the dataframe, df.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame: Distance matrix
    """
    # Create a directed graph
    G = nx.DiGraph()

    # Add edges with distances from the dataframe
    for _, row in df.iterrows():
        G.add_edge(row['id_start'], row['id_end'], distance=row['distance'])
        G.add_edge(row['id_end'], row['id_start'], distance=row['distance'])  # Ensure symmetry

    # Get a list of unique nodes
    nodes = list(G.nodes())

    # Initialize an empty matrix
    distance_matrix = pd.DataFrame(index=nodes, columns=nodes)

    # Calculate distances and fill in the matrix
    for start_node in nodes:
        for end_node in nodes:
            if start_node == end_node:
                distance_matrix.at[start_node, end_node] = 0
            else:
                try:
                    # Use networkx's shortest_path_length to calculate distances
                    distance = nx.shortest_path_length(G, start_node, end_node, weight='distance')
                    distance_matrix.at[start_node, end_node] = distance
                except nx.NetworkXNoPath:
                    # If there is no path, set distance to NaN
                    distance_matrix.at[start_node, end_node] = float('nan')

    return distance_matrix

file_path = 'C:/Users/Akshit/OneDrive/Desktop/dataset-3.csv'
df_dataset3 = pd.read_csv(file_path)
result_matrix = calculate_distance_matrix(df_dataset3)
print(result_matrix)

          1001400.0 1001402.0 1001404.0 1001406.0 1001408.0 1001410.0  \
1001400.0         0       9.7      29.9      45.9      67.6      78.7   
1001402.0       9.7         0      20.2      36.2      57.9      69.0   
1001404.0      29.9      20.2         0      16.0      37.7      48.8   
1001406.0      45.9      36.2      16.0         0      21.7      32.8   
1001408.0      67.6      57.9      37.7      21.7         0      11.1   
1001410.0      78.7      69.0      48.8      32.8      11.1         0   
1001412.0      94.3      84.6      64.4      48.4      26.7      15.6   
1001414.0     112.5     102.8      82.6      66.6      44.9      33.8   
1001416.0     125.7     116.0      95.8      79.8      58.1      47.0   
1001418.0     139.3     129.6     109.4      93.4      71.7      60.6   
1001420.0     152.2     142.5     122.3     106.3      84.6      73.5   
1001422.0     161.8     152.1     131.9     115.9      94.2      83.1   
1001424.0     173.2     163.5     143.3     127.3  

In [3]:
#Question 2 Unroll Distance Matrix
import pandas as pd

def unroll_distance_matrix(df):
    """
    Unroll a distance matrix to a DataFrame in the style of the initial dataset.

    Args:
        df (pandas.DataFrame): Input DataFrame containing a distance matrix.

    Returns:
        pandas.DataFrame: Unrolled DataFrame containing columns 'id_start', 'id_end', and 'distance'.
    """
    # Create an empty list to store the unrolled data
    unrolled_data = []

    # Iterate over the rows and columns of the distance matrix
    for id_start in df.index:
        for id_end in df.columns:
            if id_start != id_end:  # Exclude same id_start to id_end
                distance = df.at[id_start, id_end]
                unrolled_data.append({'id_start': id_start, 'id_end': id_end, 'distance': distance})

    # Create a DataFrame from the unrolled data
    unrolled_df = pd.DataFrame(unrolled_data)

    return unrolled_df
result_unrolled_df = unroll_distance_matrix(result_matrix)
print(result_unrolled_df)

       id_start     id_end  distance
0     1001400.0  1001402.0       9.7
1     1001400.0  1001404.0      29.9
2     1001400.0  1001406.0      45.9
3     1001400.0  1001408.0      67.6
4     1001400.0  1001410.0      78.7
...         ...        ...       ...
1801  1001472.0  1001462.0      72.5
1802  1001472.0  1001464.0      45.8
1803  1001472.0  1001466.0      37.3
1804  1001472.0  1001468.0      26.6
1805  1001472.0  1001470.0      16.0

[1806 rows x 3 columns]


In [4]:
#Question 3 Finding IDs within Percentage Threshold
import pandas as pd

def find_ids_within_ten_percentage_threshold(df, reference_id):
    """
    Find all IDs whose average distance lies within 10% of the average distance of the reference ID.

    Args:
        df (pandas.DataFrame): Input DataFrame.
        reference_id (int): Reference ID for calculating the average distance.

    Returns:
        pandas.DataFrame: DataFrame with IDs whose average distance is within the specified percentage threshold
                          of the reference ID's average distance.
    """
    # Calculate the average distance for the reference_id
    reference_avg_distance = df[df['id_start'] == reference_id]['distance'].mean()

    # Calculate the lower and upper bounds for the threshold (10%)
    lower_bound = reference_avg_distance - 0.1 * reference_avg_distance
    upper_bound = reference_avg_distance + 0.1 * reference_avg_distance

    # Filter the DataFrame based on the threshold
    filtered_df = df[(df['id_start'] != reference_id) & (df['distance'] >= lower_bound) & (df['distance'] <= upper_bound)]

    return filtered_df
# Replace with the desired reference ID
reference_id = 1001472  
result_filtered_df = find_ids_within_ten_percentage_threshold(result_unrolled_df, reference_id)
print(result_filtered_df)

       id_start     id_end  distance
12    1001400.0  1001426.0     191.8
13    1001400.0  1001428.0     207.6
14    1001400.0  1001430.0     216.2
15    1001400.0  1001432.0     225.2
55    1001402.0  1001428.0     197.9
...         ...        ...       ...
1736  1001470.0  1001428.0     220.7
1737  1001470.0  1001430.0     212.1
1738  1001470.0  1001432.0     203.1
1739  1001470.0  1001434.0     195.2
1740  1001470.0  1001436.0     191.2

[166 rows x 3 columns]


In [5]:
#Question 4 (Completed) Calculate Toll Rate
import pandas as pd

def calculate_toll_rate(df):
    """
    Calculate toll rates for each vehicle type based on the unrolled DataFrame.

    Args:
        df (pandas.DataFrame): Unrolled DataFrame.

    Returns:
        pandas.DataFrame: DataFrame with added columns for moto, car, rv, bus, and truck toll rates.
    """
    # Rate coefficients for each vehicle type
    rate_coefficients = {'moto': 0.8, 'car': 1.2, 'rv': 1.5, 'bus': 2.2, 'truck': 3.6}

    # Add columns for each vehicle type and calculate toll rates
    for vehicle_type, rate_coefficient in rate_coefficients.items():
        column_name = f'{vehicle_type}_toll'
        df[column_name] = df['distance'] * rate_coefficient

    return df
result_with_toll_rates = calculate_toll_rate(result_unrolled_df)
print(result_with_toll_rates)

       id_start     id_end  distance  moto_toll  car_toll  rv_toll  bus_toll  \
0     1001400.0  1001402.0       9.7       7.76     11.64    14.55     21.34   
1     1001400.0  1001404.0      29.9      23.92     35.88    44.85     65.78   
2     1001400.0  1001406.0      45.9      36.72     55.08    68.85    100.98   
3     1001400.0  1001408.0      67.6      54.08     81.12   101.40    148.72   
4     1001400.0  1001410.0      78.7      62.96     94.44   118.05    173.14   
...         ...        ...       ...        ...       ...      ...       ...   
1801  1001472.0  1001462.0      72.5      58.00     87.00   108.75    159.50   
1802  1001472.0  1001464.0      45.8      36.64     54.96    68.70    100.76   
1803  1001472.0  1001466.0      37.3      29.84     44.76    55.95     82.06   
1804  1001472.0  1001468.0      26.6      21.28     31.92    39.90     58.52   
1805  1001472.0  1001470.0      16.0      12.80     19.20    24.00     35.20   

      truck_toll  
0          34.92  
1