In [1]:
import pandas as pd
DataFrame = pd.read_csv("c:\\Users\\shri0009\\Downloads\\dataset-1.csv", encoding = 'unicode_escape' )

In [2]:
import pandas as pd
from datetime import time

# Function to calculate time-based toll rates
def calculate_time_based_toll_rates(df) -> pd.DataFrame:
    """
    Calculate time-based toll rates for different time intervals within a day.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame
    """
    
    def get_discount_factor(day, time_val):
        weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
        weekends = ['Saturday', 'Sunday']
        
        if day in weekdays:
            if time(0, 0, 0) <= time_val <= time(10, 0, 0):
                return 0.8
            elif time(10, 0, 0) < time_val <= time(18, 0, 0):
                return 1.2
            else:
                return 0.8
        elif day in weekends:
            return 0.7
        return 1.0
    
    for index, row in df.iterrows():
        start_time = time.fromisoformat(row['startTime'])
        end_time = time.fromisoformat(row['endTime'])
        
        start_factor = get_discount_factor(row['startDay'], start_time)
        end_factor = get_discount_factor(row['endDay'], end_time)
        
        toll_columns = ['able2Hov2', 'able2Hov3', 'able3Hov2', 'able3Hov3', 'able5Hov2', 'able5Hov3', 'able4Hov2', 'able4Hov3']
        for col in toll_columns:
            if pd.notnull(row[col]) and row[col] != -1:
                df.at[index, col] *= (start_factor + end_factor) / 2
    
    df['start_day'] = df['startDay']
    df['start_time'] = pd.to_datetime(df['startTime'], format='%H:%M:%S').dt.time
    df['end_day'] = df['endDay']
    df['end_time'] = pd.to_datetime(df['endTime'], format='%H:%M:%S').dt.time
    
    return df


df = pd.read_csv("C:\\Users\\shri0009\\Downloads\\dataset-1 (1).csv")

df_with_toll_rates = calculate_time_based_toll_rates(df)

df_with_toll_rates.head()  


Unnamed: 0,id,name,id_2,startDay,startTime,endDay,endTime,able2Hov2,able2Hov3,able3Hov2,able3Hov3,able5Hov2,able5Hov3,able4Hov2,able4Hov3,start_day,start_time,end_day,end_time
0,1040000,Montgomery,-1,Monday,05:00:00,Wednesday,10:00:00,2.4,2.4,-1.0,-1.0,2.4,2.4,2.4,2.4,Monday,05:00:00,Wednesday,10:00:00
1,1040010,Black,-1,Monday,10:00:00,Friday,15:00:00,6.0,6.0,-1.0,-1.0,6.0,6.0,6.0,6.0,Monday,10:00:00,Friday,15:00:00
2,1040020,Emerald,-1,Thursday,15:00:00,Friday,19:00:00,3.0,3.0,-1.0,-1.0,3.0,3.0,3.0,3.0,Thursday,15:00:00,Friday,19:00:00
3,1040030,Foley,-1,Monday,19:00:00,Friday,23:59:59,4.8,4.8,-1.0,-1.0,4.8,4.8,4.8,4.8,Monday,19:00:00,Friday,23:59:59
4,1050000,Whittier,1050001,Saturday,00:00:00,Sunday,23:59:59,4.2,4.2,,-1.0,4.2,4.2,4.2,4.2,Saturday,00:00:00,Sunday,23:59:59


In [3]:
import pandas as pd

data = {'id_start': [1, 2, 3],
        'distance': [100, 150, 200]}
df = pd.DataFrame(data)

def calculate_toll_rate(df) -> pd.DataFrame:
    """
    Calculate toll rates for different vehicle types based on distance.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame
    """
    rates = {
        'moto': 0.8,
        'car': 1.2,
        'rv': 1.5,
        'bus': 2.2,
        'truck': 3.6
    }
    
    df['moto'] = df['distance'] * rates['moto']
    df['car'] = df['distance'] * rates['car']
    df['rv'] = df['distance'] * rates['rv']
    df['bus'] = df['distance'] * rates['bus']
    df['truck'] = df['distance'] * rates['truck']
    
    return df

df_with_toll_rates = calculate_toll_rate(df)

df_with_toll_rates


Unnamed: 0,id_start,distance,moto,car,rv,bus,truck
0,1,100,80.0,120.0,150.0,220.0,360.0
1,2,150,120.0,180.0,225.0,330.0,540.0
2,3,200,160.0,240.0,300.0,440.0,720.0


In [4]:
import pandas as pd

def find_ids_within_ten_percentage_threshold(df, reference_id) -> pd.DataFrame:
    """
    Find all IDs whose average distance lies within 10% of the average distance of the reference ID.

    Args:
        df (pandas.DataFrame): DataFrame containing 'id_start' and 'distance' columns.
        reference_id (int): The ID to use as a reference for distance comparison.

    Returns:
        pandas.DataFrame: DataFrame with IDs whose average distance is within the specified percentage threshold
                          of the reference ID's average distance.
    """
    reference_rows = df[df['id_start'] == reference_id]
    
    if reference_rows.empty:
        print(f"No data found for reference ID {reference_id}.")
        return pd.DataFrame(columns=['id_start'])

    reference_avg_distance = reference_rows['distance'].mean()
    
    lower_threshold = reference_avg_distance * 0.9
    upper_threshold = reference_avg_distance * 1.1
    
    grouped = df.groupby('id_start')['distance'].mean().reset_index()
    
    filtered_ids = grouped[(grouped['distance'] >= lower_threshold) & (grouped['distance'] <= upper_threshold)]
    
    filtered_ids = filtered_ids.sort_values(by='id_start')
    
    return filtered_ids[['id_start']]

data = {
    'id_start': [1, 1, 2, 2, 3, 3],
    'distance': [100, 120, 110, 115, 130, 140]
}
df = pd.DataFrame(data)

reference_id = 1
result = find_ids_within_ten_percentage_threshold(df, reference_id)

print(result)


   id_start
0         1
1         2


In [5]:
import pandas as pd

def unroll_distance_matrix(df) -> pd.DataFrame:
    """
    Unroll a distance matrix to a DataFrame in the style of the initial dataset.

    Args:
        df (pandas.DataFrame): A square DataFrame where both the index and columns represent IDs and
                               the values represent distances.

    Returns:
        pandas.DataFrame: Unrolled DataFrame containing columns 'id_start', 'id_end', and 'distance'.
    """
    unrolled_data = []
    
    for id_start in df.index:
        for id_end in df.columns:
            if id_start != id_end:  
                distance = df.loc[id_start, id_end]
                unrolled_data.append({'id_start': id_start, 'id_end': id_end, 'distance': distance})
    
    unrolled_df = pd.DataFrame(unrolled_data)
    
    print(unrolled_df)
    
    return unrolled_df


data = {
    'A': [0, 1, 2],
    'B': [1, 0, 3],
    'C': [2, 3, 0]
}
df_distance_matrix = pd.DataFrame(data, index=['A', 'B', 'C'])

result = unroll_distance_matrix(df_distance_matrix)


  id_start id_end  distance
0        A      B         1
1        A      C         2
2        B      A         1
3        B      C         3
4        C      A         2
5        C      B         3


In [None]:
import pandas as pd

def calculate_distance_matrix(df) -> pd.DataFrame:
    
    toll_locations = pd.unique(df[['id_start', 'id_end']].values.ravel('K'))
    print("Toll locations:", toll_locations)  # Debugging print
    
    distance_matrix = pd.DataFrame(float('inf'), index=toll_locations, columns=toll_locations)
    
    for location in toll_locations:
        distance_matrix.loc[location, location] = 0
    
    print("Initialized distance matrix:")
    print(distance_matrix)
    
    for _, row in df.iterrows():
        id_start = row['id_start']
        id_end = row['id_end']
        distance = row['distance']
        distance_matrix.loc[id_start, id_end] = distance
        distance_matrix.loc[id_end, id_start] = distance  
    
    print("Distance matrix after filling known distances:")
    print(distance_matrix)
    
    for k in toll_locations:
        for i in toll_locations:
            for j in toll_locations:
                distance_matrix.loc[i, j] = min(distance_matrix.loc[i, j], distance_matrix.loc[i, k] + distance_matrix.loc[k, j])
    
    print("Final distance matrix:")
    print(distance_matrix)
    
    return distance_matrix

df = pd.read_csv("C:\\Users\\shri0009\\Downloads\\dataset-2.csv")
result = calculate_distance_matrix(df)


Toll locations: [1001400 1001402 1001404 1001406 1001408 1001410 1001412 1001414 1001416
 1001418 1001420 1001422 1001424 1001426 1001428 1001430 1001432 1001434
 1001436 1001438 1001440 1001442 1001488 1004356 1004354 1004355 1001444
 1001446 1001448 1001450 1001452 1001454 1001456 1001458 1001460 1001461
 1001462 1001464 1001466 1001468 1001470 1001437 1001472]
Initialized distance matrix:
         1001400  1001402  1001404  1001406  1001408  1001410  1001412  \
1001400      0.0      inf      inf      inf      inf      inf      inf   
1001402      inf      0.0      inf      inf      inf      inf      inf   
1001404      inf      inf      0.0      inf      inf      inf      inf   
1001406      inf      inf      inf      0.0      inf      inf      inf   
1001408      inf      inf      inf      inf      0.0      inf      inf   
1001410      inf      inf      inf      inf      inf      0.0      inf   
1001412      inf      inf      inf      inf      inf      inf      0.0   
1001414      