## Question 1: Reverse List by N Elements

In [None]:
from typing import Dict, List

import pandas as pd


def reverse_by_n_elements(lst: List[int], n: int) -> List[int]:
    """
    Reverses the input list by groups of n elements.
    """
    # Your code goes here.
    result = []
    i = 0

    while i < len(lst):
        end = min(i + n, len(lst))
        sublist = lst[i:end]
        sublist.reverse()
        result.extend(sublist)
        i = i + n

    return result

In [None]:
# Get user input for the list
input_str = input("Enter the list of numbers separated by spaces: ")
lst = [int(x) for x in input_str.split()]

# Get user input for the group size
n = int(input("Enter the group size: "))

Enter the list of numbers separated by spaces:  1 2 3 4 5 6 7 8 
Enter the group size: 3


In [None]:
# Call the function and print the result
reversed_list = reverse_by_n_elements(lst, n)
print("Reversed list:", reversed_list)

Reversed list: [3, 2, 1, 6, 5, 4, 8, 7]


## Question 2: Lists & Dictionaries

In [None]:
def group_by_length(lst: List[str]) -> Dict[int, List[str]]:
    """
    Groups the strings by their length and returns a dictionary.
    """
    # Your code here
    result = {}
    for word in lst:
        length = len(word)
        if length in result:
            result[length].append(word)
        else:
            result[length] = [word]
    return dict(sorted(result.items()))


In [None]:
# Get user input for the list of strings
input_str = input("Enter the list of strings separated by spaces: ")
lst = input_str.split()

Enter the list of strings separated by spaces: apple bat car elephant dog bear


In [None]:
# Call the function and print the result
grouped_dict = group_by_length(lst)
print("Grouped strings by length:", grouped_dict)

Grouped strings by length: {3: ['bat', 'car', 'dog'], 4: ['bear'], 5: ['apple'], 8: ['elephant']}


## Question 3: Flatten a Nested Dictionary

In [None]:
from typing import Dict

def flatten_dict(nested_dict: Dict, sep: str = '.') -> Dict:
    """
    Flattens a nested dictionary into a single-level dictionary with dot notation for keys.

    :param nested_dict: The dictionary object to flatten
    :param sep: The separator to use between parent and child keys (defaults to '.')
    :return: A flattened dictionary
    """
    # Your code here

    flattened_dict = {}

    def flatten_helper(value, key_path=""):
        if isinstance(value, dict):
            for inner_key, inner_value in value.items():
                new_key = f"{key_path}{sep}{inner_key}" if key_path else inner_key
                flatten_helper(inner_value, new_key)
        elif isinstance(value, list):
            for i, item in enumerate(value):
                new_key = f"{key_path}{sep}sections[{i}]" if key_path else f"sections[{i}]"
                flatten_helper(item, new_key)
        else:
            flattened_dict[key_path] = value

    flatten_helper(nested_dict)
    return flattened_dict



In [None]:

nested_dict = {
    "road": {
        "name": "Highway 1",
        "length": 350,
        "sections": [
            {
                "id": 1,
                "condition": {
                    "pavement": "good",
                    "traffic": "moderate"
                }
            }
        ]
    }
}

flattened_dict = flatten_dict(nested_dict)
print(flattened_dict)

{'road.name': 'Highway 1', 'road.length': 350, 'road.sections.sections[0].id': 1, 'road.sections.sections[0].condition.pavement': 'good', 'road.sections.sections[0].condition.traffic': 'moderate'}


## Question 4: Generate Unique Permutations

In [None]:
from typing import List

def unique_permutations(nums: List[int]) -> List[List[int]]:
    """
    Generate all unique permutations of a list that may contain duplicates.

    :param nums: List of integers (may contain duplicates)
    :return: List of unique permutations
    """
    # Your code here
    result = []
    used = [False] * len(nums)

    def backtrack(perm, used):
        if len(perm) == len(nums):
            result.append(perm.copy())
            return

        for i in range(len(nums)):
            if not used[i] and (i == 0 or nums[i] != nums[i - 1] or used[i - 1]):
                used[i] = True
                perm.append(nums[i])
                backtrack(perm, used)
                perm.pop()
                used[i] = False

    backtrack([], used)
    return result

In [None]:
# Get user input for the list of integers
input_str = input("Enter the list of integers separated by spaces: ")
nums = [int(x) for x in input_str.split()]

Enter the list of integers separated by spaces: 1 1 2


In [None]:
# Generate unique permutations
permutations = unique_permutations(nums)

# Print the result
print("Unique permutations:", permutations)

Unique permutations: [[1, 1, 2], [1, 2, 1], [2, 1, 1]]


 ## Question 5: Find All Dates in a Text

In [None]:
import re

def find_all_dates(text: str) -> List[str]:
    """
    This function takes a string as input and returns a list of valid dates
    in 'dd-mm-yyyy', 'mm/dd/yyyy', or 'yyyy.mm.dd' format found in the string.

    Parameters:
    text (str): A string containing the dates in various formats.

    Returns:
    List[str]: A list of valid dates in the formats specified.
    """

    # Regular expression pattern to match valid dates
    date_pattern = r"(?P<day>\d{1,2})[-/.](?P<month>\d{1,2})[-/.](?P<year>\d{4})"

    # Compile the regular expression
    regex = re.compile(date_pattern)

    # Find all matches in the text
    matches = regex.findall(text)

    # Extract and format dates from matches
    dates = []
    for match in matches:
        day, month, year = match
        # Ensure valid day and month range
        if int(day) <= 31 and int(month) <= 12:
            # Format date based on separators used
            if '-' in text:
                date = f"{day}-{month}-{year}"
            elif '/' in text:
                date = f"{month}/{day}/{year}"
            else:
                date = f"{year}.{month}.{day}"
            dates.append(date)

    return dates

# Example usage
text = "I was born on 23-08-1994, my friend on 08/23/1994, and another one on 1994.08.23. This is not a date 15-99-2023."

all_dates = find_all_dates(text)
print(all_dates)

['23-08-1994']


## Question 6: Decode Polyline, Convert to DataFrame with Distances

In [None]:
!pip install polyline

Collecting polyline
  Downloading polyline-2.0.2-py3-none-any.whl.metadata (6.4 kB)
Downloading polyline-2.0.2-py3-none-any.whl (6.0 kB)
Installing collected packages: polyline
Successfully installed polyline-2.0.2


In [None]:
import polyline
import pandas as pd
import numpy as np
from math import radians, sin, cos, sqrt, asin

In [None]:
def haversine(lat1, lon1, lat2, lon2):
    # Radius of Earth in meters
    R = 6371000
    # Convert latitudes and longitudes from degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    # Calculate differences in coordinates
    dlat = lat2 - lat1
    dlon = lon2 - lon1

    # Haversine formula
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * asin(sqrt(a))

    # Distance in meters
    distance = R * c
    return distance

In [None]:
def decode_polyline_to_dataframe(polyline_str):
    # Decode polyline into a list of (latitude, longitude) tuples
    coordinates = polyline.decode(polyline_str)

    # Create a DataFrame from the coordinates
    df = pd.DataFrame(coordinates, columns=['latitude', 'longitude'])

    # Initialize the distance column with 0 for the first row
    distances = [0]

    # Calculate distance between successive points
    for i in range(1, len(coordinates)):
        lat1, lon1 = coordinates[i-1]
        lat2, lon2 = coordinates[i]
        distance = haversine(lat1, lon1, lat2, lon2)
        distances.append(distance)

    # Add the distance column to the DataFrame
    df['distance'] = distances

    return df

In [None]:
polyline_str = "_p~iF~ps|U_ulLnnqC_mqNvxq`@"
df = decode_polyline_to_dataframe(polyline_str)
print(df)

   latitude  longitude       distance
0    38.500   -120.200       0.000000
1    40.700   -120.950  252924.435162
2    43.252   -126.453  535981.434984


## Question 7: Matrix Rotation and Transformation

In [None]:
def rotate_and_multiply_matrix(matrix: List[List[int]]) -> List[List[int]]:
    """
    Rotate the given matrix by 90 degrees clockwise, then multiply each element
    by the sum of its original row and column index before rotation.

    Args:
    - matrix (List[List[int]]): 2D list representing the matrix to be transformed.

    Returns:
    - List[List[int]]: A new 2D list representing the transformed matrix.
    """
    # Your code here
    n = len(matrix)

    # Create a new matrix to store the rotated elements
    rotated_matrix = [[0] * n for _ in range(n)]

    # Rotate the matrix 90 degrees clockwise
    for i in range(n):
        for j in range(n):
            rotated_matrix[j][n - i - 1] = matrix[i][j]

    # Multiply each element by the sum of its original row and column index
    for i in range(n):
        for j in range(n):
            rotated_matrix[i][j] *= i + j + 2  # Adjust the offset as needed

    return rotated_matrix


In [None]:
# Get the matrix size from the user
n = int(input("Enter the size of the square matrix: "))

Enter the size of the square matrix: 3


In [None]:
# Get the matrix elements from the user
matrix = []
print("Enter the elements of the matrix row by row:")
for i in range(n):
    row = []
    for j in range(n):
        element = int(input(f"Enter element ({i}, {j}): "))
        row.append(element)
    matrix.append(row)

Enter the elements of the matrix row by row:
Enter element (0, 0): 1
Enter element (0, 1): 2
Enter element (0, 2): 3
Enter element (1, 0): 4
Enter element (1, 1): 5
Enter element (1, 2): 6
Enter element (2, 0): 7
Enter element (2, 1): 8
Enter element (2, 2): 9


In [None]:
# Call the function and print the result
transformed_matrix = rotate_and_multiply_matrix(matrix)
print("Transformed matrix:")
for row in transformed_matrix:
    print(row)

Transformed matrix:
[14, 12, 4]
[24, 20, 10]
[36, 30, 18]


## Question 8: Time Check

In [None]:
# Load dataset
import pandas as pd

df = pd.read_csv('/content/dataset-1.csv')
df.head()

Unnamed: 0,id,name,id_2,startDay,startTime,endDay,endTime,able2Hov2,able2Hov3,able3Hov2,able3Hov3,able5Hov2,able5Hov3,able4Hov2,able4Hov3
0,1040000,Montgomery,-1,Monday,05:00:00,Wednesday,10:00:00,3.0,3.0,-1.0,-1,3,3,3,3
1,1040010,Black,-1,Monday,10:00:00,Friday,15:00:00,6.0,6.0,-1.0,-1,6,6,6,6
2,1040020,Emerald,-1,Thursday,15:00:00,Friday,19:00:00,3.0,3.0,-1.0,-1,3,3,3,3
3,1040030,Foley,-1,Monday,19:00:00,Friday,23:59:59,6.0,6.0,-1.0,-1,6,6,6,6
4,1050000,Whittier,1050001,Saturday,00:00:00,Sunday,23:59:59,6.0,6.0,,-1,6,6,6,6


In [None]:
from datetime import timedelta

# List of all days in a week
DAYS_OF_WEEK = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Convert time string into a timedelta object
def time_to_timedelta(time_str):
    return timedelta(hours=int(time_str[:2]), minutes=int(time_str[3:5]), seconds=int(time_str[6:8]))


In [None]:
# Function to verify if a (id, id_2) pair covers a full 7 days and 24-hour period
def check_time_completeness_optimized(df):
    result = []

    # Group by id and id_2
    grouped = df.groupby(['id', 'id_2'])

    for (id_val, id_2_val), group in grouped:
        covered_days = set()

        for _, row in group.iterrows():
            start_day, end_day = row['startDay'], row['endDay']
            start_time, end_time = row['startTime'], row['endTime']

            # Get the indices of start and end days
            start_index = DAYS_OF_WEEK.index(start_day)
            end_index = DAYS_OF_WEEK.index(end_day)

            # Add all days covered by this row
            if start_day == end_day:
                # Ensure the time covers the full 24-hour period
                if start_time == '00:00:00' and end_time == '23:59:59':
                    covered_days.add(start_day)
            else:
                # Mark start day if it starts at midnight
                if start_time == '00:00:00':
                    covered_days.add(start_day)
                # Mark end day if it ends at 23:59:59
                if end_time == '23:59:59':
                    covered_days.add(end_day)
                # Mark all days in between
                for i in range(start_index + 1, end_index):
                    covered_days.add(DAYS_OF_WEEK[i])

        # Check if all 7 days are covered
        is_complete = len(covered_days) == 7
        result.append(((id_val, id_2_val), not is_complete))

    # Return the result as a multi-index boolean series
    result_series = pd.Series(dict(result))
    return result_series

In [None]:
# Call the function on the dataset
incomplete_time_series_optimized = check_time_completeness_optimized(df)

In [None]:
# Output the result
print(incomplete_time_series_optimized)

1014000  -1           True
1014002  -1           True
1014003  -1           True
1030000  -1           True
          1030002    False
                     ...  
1330016   1330006     True
          1330008     True
          1330010     True
          1330012     True
          1330014     True
Length: 9254, dtype: bool


## Question 9: Distance Matrix Calculation

In [None]:
# Load the datasets
import pandas as pd

df1 = pd.read_csv("/content/dataset-2.csv")
df1.head()

Unnamed: 0,id_start,id_end,distance
0,1001400,1001402,9.7
1,1001402,1001404,20.2
2,1001404,1001406,16.0
3,1001406,1001408,21.7
4,1001408,1001410,11.1


In [None]:
df1.columns

Index(['id_start', 'id_end', 'distance'], dtype='object')

In [None]:
def calculate_distance_matrix(df1):
    # Extract unique toll IDs
    toll_ids = pd.unique(df1[["id_start","id_end"]].values.ravel())

    # Initialize distance matrix with infinity values
    distance_matrix = pd.DataFrame(np.inf, index=toll_ids, columns=toll_ids)

    # Set the diagonal to 0 (distance from a toll to itself is 0)
    np.fill_diagonal(distance_matrix.values, 0)

In [None]:
# Fill the matrix with direct distances from the dataset
for _, row in df1.iterrows():
  toll_a = row['id_start']
  toll_b = row['id_end']
  distance = row['distance']

In [None]:
# Set both A to B and B to A distances (symmetry)
toll_ids = pd.unique(df1[['id_start', 'id_end']].values.ravel())
distance_matrix = pd.DataFrame(np.inf, index=toll_ids, columns=toll_ids)


distance_matrix.loc[toll_a, toll_b] = distance
distance_matrix.loc[toll_b, toll_a] = distance

In [None]:
for k in toll_ids:
        for i in toll_ids:
            for j in toll_ids:
                # If a shorter path through 'k' is found, update the distance
                if distance_matrix.loc[i, j] > distance_matrix.loc[i, k] + distance_matrix.loc[k, j]:
                    distance_matrix.loc[i, j] = distance_matrix.loc[i, k] + distance_matrix.loc[k, j]
print(distance_matrix)

         1001400  1001402  1001404  1001406  1001408  1001410  1001412  \
1001400      inf      inf      inf      inf      inf      inf      inf   
1001402      inf      inf      inf      inf      inf      inf      inf   
1001404      inf      inf      inf      inf      inf      inf      inf   
1001406      inf      inf      inf      inf      inf      inf      inf   
1001408      inf      inf      inf      inf      inf      inf      inf   
1001410      inf      inf      inf      inf      inf      inf      inf   
1001412      inf      inf      inf      inf      inf      inf      inf   
1001414      inf      inf      inf      inf      inf      inf      inf   
1001416      inf      inf      inf      inf      inf      inf      inf   
1001418      inf      inf      inf      inf      inf      inf      inf   
1001420      inf      inf      inf      inf      inf      inf      inf   
1001422      inf      inf      inf      inf      inf      inf      inf   
1001424      inf      inf      inf    

## Question 10: Unroll Distance Matrix

In [None]:
import pandas as pd

def unroll_distance_matrix(distance_matrix):
    # Reset the index to convert the matrix into a long DataFrame with id_start, id_end, and distance
    unrolled_df1 = distance_matrix.reset_index().melt(id_vars='index', var_name='id_end', value_name='distance')

    # Rename columns for clarity
    unrolled_df1.rename(columns={'index': 'id_start'}, inplace=True)

    # Filter out rows where id_start == id_end (diagonal elements with distance 0)
    unrolled_df1 = unrolled_df1[unrolled_df1['id_start'] != unrolled_df1['id_end']]

    return unrolled_df1

In [None]:
# Assuming 'distance_matrix' is the DataFrame created from the previous step
unrolled_df1 = unroll_distance_matrix(distance_matrix)
print(unrolled_df1)

      id_start   id_end  distance
1      1001402  1001400       inf
2      1001404  1001400       inf
3      1001406  1001400       inf
4      1001408  1001400       inf
5      1001410  1001400       inf
...        ...      ...       ...
1843   1001462  1001472       inf
1844   1001464  1001472       inf
1845   1001466  1001472       inf
1846   1001468  1001472       inf
1847   1001470  1001472      16.0

[1806 rows x 3 columns]


## Question 11: Finding IDs within Percentage Threshold

In [None]:
# Step 1: Filter rows where id_start is the reference value
def find_ids_within_ten_percentage_threshold(unrolled_df1, reference_value):
  reference_df1 = unrolled_df1[unrolled_df1['id_start'] == reference_value]

# Step 2: Calculate the average distance for the reference value
  avg_distance = reference_df1['distance'].mean()

# Step 3: Determine the 10% threshold range (floor and ceiling)
  lower_bound = avg_distance * 0.90  # 10% below the average
  upper_bound = avg_distance * 1.10
# Step 4: Filter the DataFrame to find id_start values within the threshold
  within_threshold_df = unrolled_df1[
  (unrolled_df1['distance'] >= lower_bound) &
  (unrolled_df1['distance'] <= upper_bound)
    ]

# Step 5: Extract and return a sorted list of unique id_start values
  result_ids = within_threshold_df['id_start'].unique()

# Return the sorted list of IDs
  return sorted(result_ids)

In [None]:
result = find_ids_within_ten_percentage_threshold(unrolled_df1 , reference_value=1001400)
print(result)

[1001400, 1001402, 1001404, 1001406, 1001408, 1001410, 1001412, 1001414, 1001416, 1001418, 1001420, 1001422, 1001424, 1001426, 1001428, 1001430, 1001432, 1001434, 1001436, 1001437, 1001438, 1001440, 1001442, 1001444, 1001446, 1001448, 1001450, 1001452, 1001454, 1001456, 1001458, 1001460, 1001461, 1001462, 1001464, 1001466, 1001468, 1001470, 1001472, 1001488, 1004354, 1004355, 1004356]


## Question 12: Calculate Toll Rate

In [None]:
import pandas as pd

def calculate_toll_rate(df1):
    # Define rate coefficients for each vehicle type
    rate_coefficients = {
        'moto': 0.8,
        'car': 1.2,
        'rv': 1.5,
        'bus': 2.2,
        'truck': 3.6
    }

    # Ensure that 'distance' is a column in the DataFrame
    if 'distance' in df1.columns:
        # Calculate the toll rates by multiplying distance with rate coefficients
        df1['moto'] = df1['distance'] * rate_coefficients['moto']
        df1['car'] = df1['distance'] * rate_coefficients['car']
        df1['rv'] = df1['distance'] * rate_coefficients['rv']
        df1['bus'] = df1['distance'] * rate_coefficients['bus']
        df1['truck'] = df1['distance'] * rate_coefficients['truck']
    else:
        raise ValueError("The DataFrame must have a 'distance' column.")

    return df1


In [None]:
df1 = pd.read_csv('/content/dataset-2.csv')

# Apply the function
df_with_toll_rates = calculate_toll_rate(df1)

# Display the modified DataFrame
df_with_toll_rates.head(10)

Unnamed: 0,id_start,id_end,distance,moto,car,rv,bus,truck
0,1001400,1001402,9.7,7.76,11.64,14.55,21.34,34.92
1,1001402,1001404,20.2,16.16,24.24,30.3,44.44,72.72
2,1001404,1001406,16.0,12.8,19.2,24.0,35.2,57.6
3,1001406,1001408,21.7,17.36,26.04,32.55,47.74,78.12
4,1001408,1001410,11.1,8.88,13.32,16.65,24.42,39.96
5,1001410,1001412,15.6,12.48,18.72,23.4,34.32,56.16
6,1001412,1001414,18.2,14.56,21.84,27.3,40.04,65.52
7,1001414,1001416,13.2,10.56,15.84,19.8,29.04,47.52
8,1001416,1001418,13.6,10.88,16.32,20.4,29.92,48.96
9,1001418,1001420,12.9,10.32,15.48,19.35,28.38,46.44


## Question 13: Calculate Time-Based Toll Rates

In [None]:
import pandas as pd
import datetime

def calculate_time_based_toll_rates(df1):
    # Define the days of the week
    days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

    # Define time intervals to divide the day into different parts
    time_intervals = [
        (datetime.time(0, 0, 0), datetime.time(10, 0, 0)),  # Midnight to 10:00 AM
        (datetime.time(10, 0, 0), datetime.time(18, 0, 0)),  # 10:00 AM to 6:00 PM
        (datetime.time(18, 0, 0), datetime.time(23, 59, 59))  # 6:00 PM to midnight
    ]

    # Create an empty list to store the results
    result_list = []

    # Iterate over each row in the DataFrame
    for _, row in df1.iterrows():
        # Extract the id_start and id_end from the row
        id_start = row['id_start']
        id_end = row['id_end']

        # Extract the toll rates for different vehicle types
        moto = row['moto']
        car = row['car']
        rv = row['rv']
        bus = row['bus']
        truck = row['truck']

        # Iterate through each day of the week
        for day in days_of_week:
            # Determine if the current day is a weekend (Saturday or Sunday)
            is_weekend = day in ['Saturday', 'Sunday']

            # Apply time-based discount factors based on day and time
            for start_time, end_time in time_intervals:
                if is_weekend:
                    # Apply a 0.7 discount for weekends
                    discount_factor = 0.7
                else:
                    # Apply different discount factors based on the time interval during weekdays
                    if start_time == datetime.time(0, 0, 0):  # Early morning
                        discount_factor = 0.8
                    elif start_time == datetime.time(10, 0, 0):  # Daytime
                        discount_factor = 1.2
                    else:  # Evening
                        discount_factor = 0.8

                # Calculate adjusted toll rates by multiplying the original rates with the discount factor
                adjusted_moto = moto * discount_factor
                adjusted_car = car * discount_factor
                adjusted_rv = rv * discount_factor
                adjusted_bus = bus * discount_factor
                adjusted_truck = truck * discount_factor

                # Append the results to the list
                result_list.append({
                    'id_start': id_start,
                    'id_end': id_end,
                    'start_day': day,
                    'start_time': start_time,
                    'end_day': day,  # Same day for this time range
                    'end_time': end_time,
                    'moto': adjusted_moto,
                    'car': adjusted_car,
                    'rv': adjusted_rv,
                    'bus': adjusted_bus,
                    'truck': adjusted_truck
                })

    # Create a new DataFrame from the result list
    result_df1 = pd.DataFrame(result_list)

    # Return the final DataFrame with time-based toll rates
    return result_df1


In [None]:
# Sample DataFrame with 'id_start', 'id_end', and toll rates (moto, car, rv, bus, truck)
data = {
    'id_start': [1, 2],
    'id_end': [10, 20],
    'moto': [8.0, 16.0],
    'car': [12.0, 24.0],
    'rv': [15.0, 30.0],
    'bus': [22.0, 44.0],
    'truck': [36.0, 72.0]
}

df1 = pd.DataFrame(data)

# Apply the function to calculate time-based toll rates
df1_with_time_based_tolls = calculate_time_based_toll_rates(df1)

# Display the resulting DataFrame
df1_with_time_based_tolls.head()

Unnamed: 0,id_start,id_end,start_day,start_time,end_day,end_time,moto,car,rv,bus,truck
0,1.0,10.0,Monday,00:00:00,Monday,10:00:00,6.4,9.6,12.0,17.6,28.8
1,1.0,10.0,Monday,10:00:00,Monday,18:00:00,9.6,14.4,18.0,26.4,43.2
2,1.0,10.0,Monday,18:00:00,Monday,23:59:59,6.4,9.6,12.0,17.6,28.8
3,1.0,10.0,Tuesday,00:00:00,Tuesday,10:00:00,6.4,9.6,12.0,17.6,28.8
4,1.0,10.0,Tuesday,10:00:00,Tuesday,18:00:00,9.6,14.4,18.0,26.4,43.2
