
# Question 1: Distance Matrix Calculation# 

In [1]:
import pandas as pd
import networkx as nx

def calculate_distance_matrix(dataframe):
    # Create a graph
    G = nx.Graph()

    # Add edges and their distances to the graph
    for _, row in dataframe.iterrows():
        G.add_edge(row['id_start'], row['id_end'], distance=row['distance'])
        G.add_edge(row['id_end'], row['id_start'], distance=row['distance'])

    # Calculate shortest path distances between all pairs of nodes
    all_pairs_distances = dict(nx.all_pairs_dijkstra_path_length(G))

    # Create a DataFrame for the distance matrix
    distances_df = pd.DataFrame(all_pairs_distances).fillna(0)

    return distances_df


df = pd.read_csv('C:/Users/gaurm/Desktop/MAPUP/dataset-3.csv')

# Call the function and store the result in a DataFrame
distance_matrix = calculate_distance_matrix(df)

# Display the result
print(distance_matrix)


           1001400.0  1001402.0  1001404.0  1001406.0  1001408.0  1001410.0  \
1001400.0          0          1          2          3          4          5   
1001402.0          1          0          1          2          3          4   
1001404.0          2          1          0          1          2          3   
1001406.0          3          2          1          0          1          2   
1001408.0          4          3          2          1          0          1   
1001410.0          5          4          3          2          1          0   
1001412.0          6          5          4          3          2          1   
1001414.0          7          6          5          4          3          2   
1001416.0          8          7          6          5          4          3   
1001418.0          9          8          7          6          5          4   
1001420.0         10          9          8          7          6          5   
1001422.0         11         10          9          

# Question 2: Unroll Distance Matrix

In [2]:
import pandas as pd

def unroll_distance_matrix(distance_matrix):
    # Get the column names (id values) from the input distance_matrix
    id_values = distance_matrix.columns

    # Initialize lists to store id_start, id_end, and distance values
    id_start_list, id_end_list, distance_list = [], [], []

    # Loop through the id values and create combinations
    for id_start in id_values:
        for id_end in id_values:
            if id_start != id_end:
                # Append values to lists
                id_start_list.append(id_start)
                id_end_list.append(id_end)
                distance_list.append(distance_matrix.loc[id_start, id_end])

    # Create a DataFrame from the lists
    unrolled_df = pd.DataFrame({
        'id_start': id_start_list,
        'id_end': id_end_list,
        'distance': distance_list
    })

    return unrolled_df

# Assuming you have a DataFrame named distance_matrix from Question 1
# Call the function and store the result in a new DataFrame
unrolled_distance_df = unroll_distance_matrix(distance_matrix)

# Display the result
print(unrolled_distance_df)


       id_start     id_end  distance
0     1001400.0  1001402.0         1
1     1001400.0  1001404.0         2
2     1001400.0  1001406.0         3
3     1001400.0  1001408.0         4
4     1001400.0  1001410.0         5
...         ...        ...       ...
1801  1001472.0  1001462.0         5
1802  1001472.0  1001464.0         4
1803  1001472.0  1001466.0         3
1804  1001472.0  1001468.0         2
1805  1001472.0  1001470.0         1

[1806 rows x 3 columns]


# Question 3: Finding IDs within Percentage Threshold

In [19]:
import pandas as pd

def find_ids_within_ten_percentage_threshold(df, reference_value):
    # Ensure the reference value is an integer
    reference_value = int(reference_value)

    # Filter DataFrame based on the reference value
    reference_df = df[df['id_start'] == reference_value]

    # Check if the reference value is present in the DataFrame
    if reference_df.empty:
        print(f"No data found for id_start: {reference_value}")
        return []

    # Calculate the average distance for the reference value
    avg_distance = reference_df['distance'].mean()

    # Calculate the threshold values (10% above and below the average)
    lower_threshold = avg_distance - (avg_distance * 0.10)
    upper_threshold = avg_distance + (avg_distance * 0.10)

    # Filter DataFrame based on the threshold values
    within_threshold_df = df[(df['id_start'] != reference_value) & 
                             (df['distance'] >= lower_threshold) & 
                             (df['distance'] <= upper_threshold)]

    # Get unique values from the 'id_start' column and sort them
    within_threshold_ids = sorted(within_threshold_df['id_start'].unique())

    return within_threshold_ids


# Question 4: Calculate Toll Rate

In [17]:
import pandas as pd

def calculate_toll_rate(df):
    # Define rate coefficients for each vehicle type
    rate_coefficients = {'moto': 0.8, 'car': 1.2, 'rv': 1.5, 'bus': 2.2, 'truck': 3.6}

    # Add columns for each vehicle type with their respective toll rates
    for vehicle_type, rate_coefficient in rate_coefficients.items():
        df[vehicle_type] = df['distance'] * rate_coefficient

    return df

# Assuming you have a DataFrame named unrolled_distance_df from Question 2
# Call the function and store the result in a new DataFrame
toll_rate_df = calculate_toll_rate(unrolled_distance_df)

# Display the result
print(toll_rate_df)


       id_start     id_end  distance  moto  car   rv   bus  truck
0     1001400.0  1001402.0         1   0.8  1.2  1.5   2.2    3.6
1     1001400.0  1001404.0         2   1.6  2.4  3.0   4.4    7.2
2     1001400.0  1001406.0         3   2.4  3.6  4.5   6.6   10.8
3     1001400.0  1001408.0         4   3.2  4.8  6.0   8.8   14.4
4     1001400.0  1001410.0         5   4.0  6.0  7.5  11.0   18.0
...         ...        ...       ...   ...  ...  ...   ...    ...
1801  1001472.0  1001462.0         5   4.0  6.0  7.5  11.0   18.0
1802  1001472.0  1001464.0         4   3.2  4.8  6.0   8.8   14.4
1803  1001472.0  1001466.0         3   2.4  3.6  4.5   6.6   10.8
1804  1001472.0  1001468.0         2   1.6  2.4  3.0   4.4    7.2
1805  1001472.0  1001470.0         1   0.8  1.2  1.5   2.2    3.6

[1806 rows x 8 columns]


# Question 5: Calculate Time-Based Toll Rates# 