In [5]:
import pandas as pd

def calculate_distance_matrix(df: pd.DataFrame) -> pd.DataFrame:
  
    df['distance'] = pd.to_numeric(df['distance'], errors='coerce')
  
    distance_matrix = df.pivot(index='id_start', columns='id_end', values='distance').fillna(0)
    
    distance_matrix = distance_matrix + distance_matrix.T - distance_matrix * (distance_matrix.T != 0)

    return distance_matrix

def find_ids_within_ten_percentage_threshold(df: pd.DataFrame, reference_id: int) -> pd.DataFrame:
 
    df['distance'] = pd.to_numeric(df['distance'], errors='coerce')
    
    reference_distance = df[df['id_start'] == reference_id]['distance'].mean()
    threshold = 0.1 * reference_distance
    
    result_df = df.groupby('id_start')['distance'].mean().reset_index()
    result_df = result_df.dropna()  # Drop rows with NaN values
    result_df = result_df[(result_df['distance'] >= reference_distance - threshold) & 
                          (result_df['distance'] <= reference_distance + threshold)]

    return result_df

dataset_path = r'C:\Users\User\Downloads\dataset-3.csv'
data = pd.read_csv(dataset_path)

distance_matrix = calculate_distance_matrix(data)
unrolled_df = unroll_distance_matrix (distance_matrix)

reference_id = 1

result_df = find_ids_within_ten_percentage_threshold(unrolled_df, reference_id)
print("Result DataFrame:")
print(result_df)


Result DataFrame:
Empty DataFrame
Columns: [id_start, distance]
Index: []


In [3]:
import pandas as pd

def unroll_distance_matrix(distance_matrix: pd.DataFrame) -> pd.DataFrame:

    # Stack the distance matrix to create a Series with multi-level index
    stacked_matrix = distance_matrix.stack()

    # Reset index to convert multi-level index to columns
    unrolled_df = stacked_matrix.reset_index()

    # Rename columns for consistency
    unrolled_df.columns = ['id_start', 'id_end', 'distance']

    # Filter out rows where id_start is equal to id_end
    unrolled_df = unrolled_df[unrolled_df['id_start'] != unrolled_df['id_end']]

    return unrolled_df


distance_matrix = calculate_distance_matrix(data)

unrolled_df = unroll_distance_matrix(distance_matrix)
print("Unrolled DataFrame:")
print(unrolled_df)


Unrolled DataFrame:
      id_start   id_end distance
1      1001402  1001404     20.2
2      1001402  1001406      0.0
3      1001402  1001408      0.0
4      1001402  1001410      0.0
5      1001402  1001412      0.0
...        ...      ...      ...
1594   1004356  1001468      0.0
1595   1004356  1001470      0.0
1596   1004356  1001488      4.0
1597   1004356  1004354      2.0
1598   1004356  1004355      0.0

[1560 rows x 3 columns]


In [4]:
import pandas as pd

def find_ids_within_ten_percentage_threshold(df: pd.DataFrame, reference_id: int) -> pd.DataFrame:
  
   
    df['distance'] = pd.to_numeric(df['distance'], errors='coerce')

    reference_distance = df[df['id_start'] == reference_id]['distance'].mean()

    threshold = 0.1 * reference_distance
    
    result_df = df.groupby('id_start')['distance'].mean().reset_index()
    result_df = result_df[(result_df['distance'].notnull()) &  # Exclude NaN values
                          (result_df['distance'] >= reference_distance - threshold) & 
                          (result_df['distance'] <= reference_distance + threshold)]
    result_df = result_df.sort_values(by='id_start')

    return result_df

unrolled_df = unroll_distance_matrix (distance_matrix)

reference_id = 1

result_df = find_ids_within_ten_percentage_threshold(unrolled_df, reference_id)
print("Result DataFrame:")
print(result_df)


Result DataFrame:
Empty DataFrame
Columns: [id_start, distance]
Index: []


In [7]:
import pandas as pd

def calculate_toll_rate(distance_matrix: pd.DataFrame) -> pd.DataFrame:
  
    result_df = distance_matrix.copy()

    rate_coefficients = {'moto': 0.8, 'car': 1.2, 'rv': 1.5, 'bus': 2.2, 'truck': 3.6}

    for vehicle_type, rate_coefficient in rate_coefficients.items():
        result_df[vehicle_type] = result_df['distance'] * rate_coefficient

    return result_df

unrolled_df = unroll_distance_matrix(distance_matrix)
toll_rate_df = calculate_toll_rate(unrolled_df)
print("DataFrame with Toll Rates:")
print(toll_rate_df)


DataFrame with Toll Rates:
      id_start   id_end distance   moto    car    rv    bus  truck
0      1001402  1001402      0.0    0.0    0.0   0.0    0.0    0.0
1      1001402  1001404     20.2  16.16  24.24  30.3  44.44  72.72
2      1001402  1001406      0.0    0.0    0.0   0.0    0.0    0.0
3      1001402  1001408      0.0    0.0    0.0   0.0    0.0    0.0
4      1001402  1001410      0.0    0.0    0.0   0.0    0.0    0.0
...        ...      ...      ...    ...    ...   ...    ...    ...
1595   1004356  1001470      0.0    0.0    0.0   0.0    0.0    0.0
1596   1004356  1001488      4.0    3.2    4.8   6.0    8.8   14.4
1597   1004356  1004354      2.0    1.6    2.4   3.0    4.4    7.2
1598   1004356  1004355      0.0    0.0    0.0   0.0    0.0    0.0
1599   1004356  1004356      0.0    0.0    0.0   0.0    0.0    0.0

[1600 rows x 8 columns]


In [10]:
import pandas as pd

def calculate_time_based_toll_rates(distance_matrix: pd.DataFrame) -> pd.DataFrame:
    
    result_df = distance_matrix.copy()

    weekday_time_ranges = [(pd.to_datetime('00:00:00').time(), pd.to_datetime('10:00:00').time()),
                           (pd.to_datetime('10:00:00').time(), pd.to_datetime('18:00:00').time()),
                           (pd.to_datetime('18:00:00').time(), pd.to_datetime('23:59:59').time())]

    weekend_time_ranges = [(pd.to_datetime('00:00:00').time(), pd.to_datetime('23:59:59').time())
   
    for day in range(7): 
        for start_time, end_time in weekday_time_ranges:
            mask = (result_df['start_day'] == day) & (result_df['start_time'] >= start_time) & (result_df['end_time'] <= end_time)
            result_df.loc[mask, ['moto', 'car', 'rv', 'bus', 'truck']] *= 0.8

        for start_time, end_time in weekend_time_ranges:
            mask = (result_df['start_day'] == day) & (result_df['start_time'] >= start_time) & (result_df['end_time'] <= end_time)
            result_df.loc[mask, ['moto', 'car', 'rv', 'bus', 'truck']] *= 0.7

    return result_df

toll_rate_df = calculate_toll_rate(unrolled_df)

toll_rate_df['start_day'] = 0
toll_rate_df['end_day'] = 6
toll_rate_df['start_time'] = pd.to_datetime('00:00:00').time()
toll_rate_df['end_time'] = pd.to_datetime('23:59:59').time()

time_based_toll_df = calculate_time_based_toll_rates(toll_rate_df)
print("DataFrame with Time-Based Toll Rates:")
print(time_based_toll_df)


SyntaxError: invalid syntax (4136842413.py, line 13)