In [2]:
#Qusetion 1

from typing import List

def reverse_by_n_elements(lst: List[int], n: int) -> List[int]:
    """
    Reverses the input list by groups of n elements
    param lst: List of integers to be reversed
    param n: Number of elements in each group to reverse
    return: New list with elements reversed by groups of n
    """
    reversed_list = []
    for i in range(0, len(lst), n):
        chunk_c = lst[i:i+n]
        reversed_list.extend(reversed(chunk_c))
    return reversed_list

lst = [1, 2, 3, 4, 5, 6, 7, 8,9,10]
n = 5
print(reverse_by_n_elements(lst, n)) 


[5, 4, 3, 2, 1, 10, 9, 8, 7, 6]


In [3]:
#Question 2

from typing import Dict, List
def group_by_length(lst: List[str]) -> Dict[int, List[str]]:
    """
    Groups the strings by their length and returns a dictionary
    param lst: List of strings to be grouped length.
    """
    length_dict = {}
    for string in lst:
        length = len(string)
        if length not in length_dict:
            length_dict[length] = []
        length_dict[length].append(string)
    return length_dict

lst = ["apple", "banana", "watermelon" ,"cherry", "plum", "peach"]
print(group_by_length(lst))


{5: ['apple', 'peach'], 6: ['banana', 'cherry'], 10: ['watermelon'], 4: ['plum']}


In [4]:
#Question 3)

from typing import Dict
def flatten_dict(nested_dict: Dict, sep: str = '.') -> Dict:
    """
    Flattens a nested dictionary into a single-level dictionary with dot notation for keys.
    param nested_dict: The dictionary object to flatten.
    param sep: The separator to use between parent and child keys (defaults to '.').
    return: A flattened dictionary.
    """
    def _flatten(current_dict: Dict, parent_key: str = '') -> Dict:
        items = {}
        for key, value in current_dict.items():
            new_key = f"{parent_key}{sep}{key}" if parent_key else key
            if isinstance(value, dict):
                items.update(_flatten(value, new_key))
            else:
                items[new_key] = value
        return items
    return _flatten(nested_dict)

nested = {'a':5,'b':{'c':2},'d':{'e':3}}

flattened = flatten_dict(nested)
print(flattened)


{'a': 5, 'b.c': 2, 'd.e': 3}


In [5]:
#Question 3)

from typing import List
def unique_permutations(nums: List[int]) -> List[List[int]]:
    """
    Generate all unique permutations of a list that may contain duplicates.
    param nums: List of integers (may contain duplicates).
    return: List of unique permutations.
    """
    def backtrack(path, used, result):
        if len(path) == len(nums):
            result.append(path[:])
            return
        for i in range(len(nums)):
            if used[i] or (i > 0 and nums[i] == nums[i - 1] and not used[i - 1]):
                continue

            used[i] = True
            path.append(nums[i])
            backtrack(path, used, result)
            path.pop()
            used[i] = False
    nums.sort()  
    result = []
    used = [False] * len(nums)
    backtrack([], used, result)
    return result

nums = [2, 1, 2]
print(unique_permutations(nums))

[[1, 2, 2], [2, 1, 2], [2, 2, 1]]


In [6]:
#Question 4)

import re
from typing import List
def find_all_dates(text: str) -> List[str]:
    """
    This function takes a string as input and returns a list of valid dates
    in 'dd-mm-yyyy', 'mm/dd/yyyy', or 'yyyy.mm.dd' format found in the string.
    Parameters:
    text (str): A string containing the dates in various formats.
    Returns:
    List[str]: A list of valid dates in the formats specified.
    """
    date_patterns = [
        r'\b\d{2}-\d{2}-\d{4}\b',
        r'\b\d{2}/\d{2}/\d{4}\b', 
        r'\b\d{4}\.\d{2}\.\d{2}\b'
    ]
    dates = []
    for pattern in date_patterns:
        matches = re.findall(pattern, text)
        dates.extend(matches)
    return dates

text = "Today is 12-11-2024,yesterday was 10/11/2024. The next event is on 2025.01.01."
found_dates = find_all_dates(text)
print(found_dates)





['12-11-2024', '10/11/2024', '2025.01.01']


In [7]:
#Question 5)
#!pip install polyline
#!pip install geopy
import pandas as pd
import polyline
from geopy.distance import geodesic
from typing import List, Tuple
def polyline_to_dataframe(polyline_str: str) -> pd.DataFrame:
    """
    Converts a polyline string into a DataFrame with latitude, longitude, and distance between consecutive points.
    Args:
        polyline_str (str): The encoded polyline string.
    Returns:
        pd.DataFrame: A DataFrame containing latitude, longitude, and distance in meters between consecutive points.
    """
    cs: List[Tuple[float, float]] = polyline.decode(polyline_str)
    df = pd.DataFrame(cs, columns=['latitude', 'longitude'])
    distances = [0]  
    for i in range(1, len(cs)):
        dist = geodesic(cs[i-1], cs[i]).meters
        distances.append(dist)
    df['distance'] = distances
    return df
polyline_str = "_p~iF~ps|U_ulLnnqC_mqNvxq`@"
df = polyline_to_dataframe(polyline_str)
print(df)


   latitude  longitude       distance
0    38.500   -120.200       0.000000
1    40.700   -120.950  252607.739960
2    43.252   -126.453  536828.686417


In [8]:
#Question 6)
from typing import List

def rotate_and_multiply_matrix(matrix: List[List[int]]) -> List[List[int]]:
    """
    Rotate the given matrix by 90 degrees clockwise, then multiply each element 
    by the sum of its original row and column index before rotation.
    Args:
    - matrix (List[List[int]]): 2D list representing the matrix to be transformed.
    Returns:
    - List[List[int]]: A new 2D list representing the transformed matrix.
    """
    n = len(matrix)
    rotated_matrix = [[0] * n for _ in range(n)]
    for i in range(n):
        for j in range(n):
            rotated_matrix[j][n - 1 - i] = matrix[i][j]
    transformed = [[0] * n for _ in range(n)]
    for i in range(n):
        for j in range(n):
            original_row = j
            original_col = n - 1 - i
            index_sum = original_row + original_col
            transformed[i][j] = rotated_matrix[i][j] * index_sum
    return transformed
matrix= [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]
result = rotate_and_multiply_matrix(matrix)
for row in result:
    print(row)

[14, 12, 4]
[8, 10, 6]
[0, 6, 6]


In [9]:
#Question 7)
import numpy as np
import pandas as pd
df = pd.read_csv('C:/Users/DELL/Downloads/dataset-1.csv')
def time_check(df: pd.DataFrame) -> pd.Series:
    """
    Verify the completeness of the data by checking whether there are at least 7 entries for each unique
    (id, id_2) pair.
    Args:
        df (pandas.DataFrame): DataFrame containing the data.
     Returns:
        pd.Series: Boolean series indicating whether each (id, id_2) pair has enough entries.
    """
    def has_minimum_entries(group):
        return len(group) >= 7
    coverage_results = df.groupby(['id', 'id_2']).apply(has_minimum_entries)

    return coverage_results


result_series = time_check(df)
print(result_series)



id       id_2    
1014000  -1          False
1014002  -1          False
1014003  -1          False
1030000  -1           True
          1030002     True
                     ...  
1330016   1330006    False
          1330008    False
          1330010    False
          1330012    False
          1330014    False
Length: 9254, dtype: bool


In [10]:
#SECTION 2

In [11]:
#Question 1)

import numpy as np
import pandas as pd
df = pd.read_csv('C:/Users/DELL/Downloads/dataset-2.csv')

def calculate_distance_matrix(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate a distance matrix based on the dataframe, df using Floyd-Warshall's algorithm.
    Args:
        df (pandas.DataFrame): DataFrame with 'id_start', 'id_end', and 'distance'.
    Returns:
        pd.DataFrame: Complete distance matrix with shortest paths.
    """
    unique_ids = pd.unique(df[['id_start', 'id_end']].values.ravel())
    distance_matrix = pd.DataFrame(np.inf, index=unique_ids, columns=unique_ids)
    
    for point in unique_ids:
        distance_matrix.loc[point, point] = 0
    for _, row in df.iterrows():
        distance_matrix.loc[row['id_start'], row['id_end']] = row['distance']
        distance_matrix.loc[row['id_end'], row['id_start']] = row['distance'] 
    n = distance_matrix.shape[0]
    for k in range(n):
        for i in range(n):
            for j in range(n):
                distance_matrix.iloc[i, j] = min(distance_matrix.iloc[i, j], 
                                                 distance_matrix.iloc[i, k] + distance_matrix.iloc[k, j])
    
    return distance_matrix

a=calculate_distance_matrix(df)
a


Unnamed: 0,1001400,1001402,1001404,1001406,1001408,1001410,1001412,1001414,1001416,1001418,...,1001456,1001458,1001460,1001461,1001462,1001464,1001466,1001468,1001470,1001472
1001400,0.0,9.7,29.9,45.9,67.6,78.7,94.3,112.5,125.7,139.3,...,339.9,348.8,353.9,366.7,371.8,398.5,407.0,417.7,428.3,444.3
1001402,9.7,0.0,20.2,36.2,57.9,69.0,84.6,102.8,116.0,129.6,...,330.2,339.1,344.2,357.0,362.1,388.8,397.3,408.0,418.6,434.6
1001404,29.9,20.2,0.0,16.0,37.7,48.8,64.4,82.6,95.8,109.4,...,310.0,318.9,324.0,336.8,341.9,368.6,377.1,387.8,398.4,414.4
1001406,45.9,36.2,16.0,0.0,21.7,32.8,48.4,66.6,79.8,93.4,...,294.0,302.9,308.0,320.8,325.9,352.6,361.1,371.8,382.4,398.4
1001408,67.6,57.9,37.7,21.7,0.0,11.1,26.7,44.9,58.1,71.7,...,272.3,281.2,286.3,299.1,304.2,330.9,339.4,350.1,360.7,376.7
1001410,78.7,69.0,48.8,32.8,11.1,0.0,15.6,33.8,47.0,60.6,...,261.2,270.1,275.2,288.0,293.1,319.8,328.3,339.0,349.6,365.6
1001412,94.3,84.6,64.4,48.4,26.7,15.6,0.0,18.2,31.4,45.0,...,245.6,254.5,259.6,272.4,277.5,304.2,312.7,323.4,334.0,350.0
1001414,112.5,102.8,82.6,66.6,44.9,33.8,18.2,0.0,13.2,26.8,...,227.4,236.3,241.4,254.2,259.3,286.0,294.5,305.2,315.8,331.8
1001416,125.7,116.0,95.8,79.8,58.1,47.0,31.4,13.2,0.0,13.6,...,214.2,223.1,228.2,241.0,246.1,272.8,281.3,292.0,302.6,318.6
1001418,139.3,129.6,109.4,93.4,71.7,60.6,45.0,26.8,13.6,0.0,...,200.6,209.5,214.6,227.4,232.5,259.2,267.7,278.4,289.0,305.0


In [12]:
#Question 2)

import pandas as pd
df = pd.read_csv('C:/Users/DELL/Downloads/dataset-2.csv')

def unroll_distance_matrix(df: pd.DataFrame) -> pd.DataFrame:
    """
    Unroll a distance matrix into a DataFrame in the format of 'id_start', 'id_end', and 'distance'.
    Args:
        df (pandas.DataFrame): Distance matrix where rows and columns are points, and values are distances.
    Returns:
        pd.DataFrame: Unrolled DataFrame with columns 'id_start', 'id_end', and 'distance'.
    """
    unrolled_data = []
    for id_start in df.index:
        for id_end in df.columns:
            if id_start != id_end:
                distance = df.loc[id_start, id_end]
                unrolled_data.append([id_start, id_end, distance])
    unrolled_df = pd.DataFrame(unrolled_data, columns=['id_start', 'id_end', 'distance'])
    return unrolled_df


b= unroll_distance_matrix(a) #where a=distance matrix from question first.
b

Unnamed: 0,id_start,id_end,distance
0,1001400,1001402,9.7
1,1001400,1001404,29.9
2,1001400,1001406,45.9
3,1001400,1001408,67.6
4,1001400,1001410,78.7
...,...,...,...
1801,1001472,1001462,72.5
1802,1001472,1001464,45.8
1803,1001472,1001466,37.3
1804,1001472,1001468,26.6


In [13]:
#Question 3)
import numpy as np
import pandas as pd
df= pd.read_csv('C:/Users/DELL/Downloads/dataset-2.csv')

def find_ids_within_ten_percentage_threshold(df: pd.DataFrame, reference_id: int) -> pd.DataFrame:
    """
    Find all IDs whose average distance lies within 10% of the average distance of the reference ID.
    Args:
        df (pandas.DataFrame): DataFrame containing the distance data.
        reference_id (int): The ID to compare against.
    Returns:
        pandas.DataFrame: DataFrame with IDs whose average distance is within the specified percentage threshold
                          of the reference ID's average distance.
    """
    avg_distance_ref = df[df['id_start'] == reference_id]['distance'].mean()
    if pd.isna(avg_distance_ref):
        return pd.DataFrame(columns=['id_start', 'distance'])
    threshold = avg_distance_ref * 0.10
    lower_bound = avg_distance_ref - threshold
    upper_bound = avg_distance_ref + threshold
    
    avg_distances = df.groupby('id_start')['distance'].mean().reset_index()
    
    result = avg_distances[(avg_distances['distance'] >= lower_bound) & 
                            (avg_distances['distance'] <= upper_bound)]
    
    return result

result = find_ids_within_ten_percentage_threshold(df, reference_id=1001400)
print(result)
#if we use 1 ,2(reference_id)means not included in df then it will give
#empty as output.



    id_start  distance
0    1001400       9.7
10   1001420       9.6
15   1001430       9.0
23   1001446       9.6
25   1001450       9.9
28   1001456       8.9
35   1001468      10.6


In [14]:
#Question 4)

import pandas as pd
def calculate_toll_rate(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate toll rates for each vehicle type based on the unrolled DataFrame.
    Args:
        df (pandas.DataFrame): DataFrame containing vehicle data.
    Returns:
        pandas.DataFrame: DataFrame with an additional column for toll rates.
    """
    toll_rates = {
        'car': 1.50,  
        'truck': 4.30, 
        'bus': 2.50,   
    }
    
    df['toll_rate'] = df['vehicle_type'].map(toll_rates)
    
    df['toll_rate'] = df['toll_rate'].fillna(0)  
    return df
df = pd.DataFrame({
    'vehicle_type': ['car', 'truck', 'bus', 'car', 'bike']
})
result = calculate_toll_rate(df)
print(result)


  vehicle_type  toll_rate
0          car        1.5
1        truck        4.3
2          bus        2.5
3          car        1.5
4         bike        0.0


In [15]:
#Question 5)
import pandas as pd

def calculate_time_based_toll_rates(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate time-based toll rates for different time intervals within a day.
    Args:
        df (pandas.DataFrame): DataFrame containing vehicle entry time.
    Returns:
        pandas.DataFrame: DataFrame with an additional column for calculated toll rates.
    """
    df['entry_time'] = pd.to_datetime(df['entry_time'])
    
    def get_toll_rate(entry_time):
        if entry_time.hour >= 7 and entry_time.hour < 10:  import pandas as pd

def calculate_time_based_toll_rates(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate time-based toll rates for different time intervals within a day.

    Args:
        df (pandas.DataFrame): DataFrame containing vehicle entry time.

    Returns:
        pandas.DataFrame: DataFrame with an additional column for calculated toll rates.
    """
    
    df['entry_time'] = pd.to_datetime(df['entry_time'])
    def get_toll_rate(entry_time):
        if entry_time.hour >= 7 and entry_time.hour < 10:  # Morning peak hours
            return 5.00  
        elif entry_time.hour >= 10 and entry_time.hour < 16:  # Daytime
            return 3.00  
        elif entry_time.hour >= 16 and entry_time.hour < 19:  # Evening peak hours
            return 5.00  
        else:  # Nighttime
            return 2.00  
    df['toll_rate'] = df['entry_time'].apply(get_toll_rate)
    return df

df = pd.DataFrame({
    'entry_time': ['2024-10-23 08:30:00', '2024-10-23 14:00:00', '2024-10-23 17:30:00', '2024-10-23 22:00:00'],
    'vehicle_type': ['car', 'truck', 'bus', 'car']
})

result= calculate_time_based_toll_rates(df)
print(result)

         


           entry_time vehicle_type  toll_rate
0 2024-10-23 08:30:00          car        5.0
1 2024-10-23 14:00:00        truck        3.0
2 2024-10-23 17:30:00          bus        5.0
3 2024-10-23 22:00:00          car        2.0
