In [2]:
#Question 1: Reverse List by N Elements
#solution
# The function iterates over the input list in steps of n.
# For each group of n elements, it collects the elements in a temporary list (group).
# It then reverses this group manually by appending the elements in reverse order to the final result list (result).
# Finally, the function returns the result list.

In [7]:
from typing import Dict, List

import pandas as pd

def reverse_by_n_elements(lst: List[int], n: int) -> List[int]:
    """
    Reverses the input list by groups of n elements.
    
    :param lst: List of integers to reverse.
    :param n: Size of the groups to reverse.
    :return: A new list with every group of n elements reversed.
    """
    result = []
    for i in range(0, len(lst), n):
        # Take a group of n elements
        group = []
        for j in range(n):
            # Check if the index is within the bounds of the list
            if i + j < len(lst):
                group.append(lst[i + j])
        
        # Reverse the group and append it to the result
        for k in range(len(group) - 1, -1, -1):
            result.append(group[k])
    
    return result

# Example usage
print(reverse_by_n_elements([1, 2, 3, 4, 5, 6, 7, 8], 3))  # Output: [3, 2, 1, 6, 5, 4, 8, 7]
print(reverse_by_n_elements([1, 2, 3, 4, 5], 2))            # Output: [2, 1, 4, 3, 5]
print(reverse_by_n_elements([10, 20, 30, 40, 50, 60, 70], 4))  # Output: [40, 30, 20, 10, 70, 60, 50]


[3, 2, 1, 6, 5, 4, 8, 7]
[2, 1, 4, 3, 5]
[40, 30, 20, 10, 70, 60, 50]


In [26]:
#Question 2: Lists & Dictionaries
# Initialization: The function initializes an empty dictionary, length_dict, to store lists of strings grouped by their lengths.
# Iteration: It iterates through each string in the input list:
# It calculates the length of each string.
# If the length is not already a key in the dictionary, it initializes an empty list for that length.
# It appends the string to the corresponding list.
# Sorting: After grouping the strings, the dictionary is sorted by its keys (lengths) using sorted(), and the sorted dictionary is returned


In [10]:
from typing import List, Dict

def reverse_by_n_elements(lst: List[int], n: int) -> List[int]:
    """
    Reverses the input list by groups of n elements.
    
    :param lst: List of integers to reverse.
    :param n: Size of the groups to reverse.
    :return: A new list with every group of n elements reversed.
    """
    result = []
    for i in range(0, len(lst), n):
        # Create a temporary list for the current group
        group = []
        for j in range(n):
            if i + j < len(lst):  # Ensure we are within bounds
                group.append(lst[i + j])
        
        # Append the reversed group to the result
        for k in range(len(group) - 1, -1, -1):
            result.append(group[k])
    
    return result


def group_by_length(lst: List[str]) -> Dict[int, List[str]]:
    """
    Groups the strings by their length and returns a dictionary.
    
    :param lst: List of strings to group.
    :return: A dictionary where keys are string lengths and values are lists of strings with that length.
    """
    length_dict = {}
    
    for string in lst:
        length = len(string)
        if length not in length_dict:
            length_dict[length] = []
        length_dict[length].append(string)
    
    return dict(sorted(length_dict.items()))

# Example usage for testing
if __name__ == "__main__":
    # Test reverse_by_n_elements
    print(reverse_by_n_elements([1, 2, 3, 4, 5, 6, 7, 8], 3))  # Output: [3, 2, 1, 6, 5, 4, 8, 7]
    print(reverse_by_n_elements([1, 2, 3, 4, 5], 2))            # Output: [2, 1, 4, 3, 5]
    print(reverse_by_n_elements([10, 20, 30, 40, 50, 60, 70], 4))  # Output: [40, 30, 20, 10, 70, 60, 50]
    
    # Test group_by_length
    print(group_by_length(["apple", "bat", "car", "elephant", "dog", "bear"]))
    # Output: {3: ['bat', 'car', 'dog'], 4: ['bear'], 5: ['apple'], 8: ['elephant']}
    print(group_by_length(["one", "two", "three", "four"]))
    # Output: {3: ['one', 'two'], 4: ['four'], 5: ['three']}


[3, 2, 1, 6, 5, 4, 8, 7]
[2, 1, 4, 3, 5]
[40, 30, 20, 10, 70, 60, 50]
{3: ['bat', 'car', 'dog'], 4: ['bear'], 5: ['apple'], 8: ['elephant']}
{3: ['one', 'two'], 4: ['four'], 5: ['three']}


In [9]:
#Question 3: Flatten a Nested Dictionary
# Function Definition: The flatten_dict function takes a nested dictionary and an optional separator (default is .).

# Helper Function: The flatten helper function does the actual flattening:

# It checks if the current item is a dictionary or a list.
# If it’s a dictionary, it recursively calls itself for each key-value pair.
# If it’s a list, it iterates through the list, using the index to create keys with square brackets.
# If it’s neither (a base case), it adds the key-value pair to the flat_dict.
# Return Value: The flattened dictionary is returned, where keys represent the full path of nested items.


In [11]:
from typing import Dict, Any, Union, List

def flatten_dict(nested_dict: Dict[str, Any], sep: str = '.') -> Dict[str, Any]:
    """
    Flattens a nested dictionary into a single-level dictionary with dot notation for keys.
    
    :param nested_dict: The dictionary object to flatten
    :param sep: The separator to use between parent and child keys (defaults to '.')
    :return: A flattened dictionary
    """
    flat_dict = {}

    def flatten(current_dict: Union[Dict, List], parent_key: str = ''):
        if isinstance(current_dict, dict):
            for key, value in current_dict.items():
                new_key = f"{parent_key}{sep}{key}" if parent_key else key
                flatten(value, new_key)
        elif isinstance(current_dict, list):
            for index, item in enumerate(current_dict):
                new_key = f"{parent_key}[{index}]"
                flatten(item, new_key)
        else:
            flat_dict[parent_key] = current_dict

    flatten(nested_dict)
    return flat_dict

# Example usage
nested_dictionary = {
    "road": {
        "name": "Highway 1",
        "length": 350,
        "sections": [
            {
                "id": 1,
                "condition": {
                    "pavement": "good",
                    "traffic": "moderate"
                }
            }
        ]
    }
}

flattened = flatten_dict(nested_dictionary)
print(flattened)
# Expected output:
# {
#     'road.name': 'Highway 1',
#     'road.length': 350,
#     'road.sections[0].id': 1,
#     'road.sections[0].condition.pavement': 'good',
#     'road.sections[0].condition.traffic': 'moderate'
# }


{'road.name': 'Highway 1', 'road.length': 350, 'road.sections[0].id': 1, 'road.sections[0].condition.pavement': 'good', 'road.sections[0].condition.traffic': 'moderate'}


In [28]:
#Question 4: Generate Unique Permutations
# To generate all unique permutations of a list of integers that may contain duplicates, we can use backtracking along with 
# a set to track which elements have been used at each position. This way, we can ensure that we don't generate duplicate permutations

# Function Definition: The unique_permutations function generates unique permutations from the input list nums.

# Backtracking: The helper function backtrack is used to build permutations:

# When the start index reaches the end of nums, a valid permutation is found, and it is appended to the result.
# A seen set is used to track which elements have already been used in the current position to avoid duplicates.
# The loop iterates through the elements, swapping the current element with the start element to explore permutations.
# After the recursive call, we backtrack by swapping the elements back to their original positions.
# Sorting: The input list is sorted at the beginning to ensure that duplicates are adjacent, which helps in easily skipping them.


In [13]:
from typing import List

def unique_permutations(nums: List[int]) -> List[List[int]]:
    """
    Generate all unique permutations of a list that may contain duplicates.
    
    :param nums: List of integers (may contain duplicates)
    :return: List of unique permutations
    """
    def backtrack(start: int):
        # If we've reached the end of the array, we found a valid permutation
        if start == len(nums):
            result.append(nums[:])  # Append a copy of the current permutation
            return
        
        seen = set()  # To keep track of used numbers at this position
        for i in range(start, len(nums)):
            if nums[i] in seen:  # Skip duplicates
                continue
            seen.add(nums[i])  # Mark the number as seen
            # Swap the current element with the start element
            nums[start], nums[i] = nums[i], nums[start]
            backtrack(start + 1)  # Recur for the next position
            # Backtrack (swap back)
            nums[start], nums[i] = nums[i], nums[start]

    result = []
    nums.sort()  # Sort the numbers to handle duplicates
    backtrack(0)
    return result

# Example usage
input_nums = [1, 1, 2]
output = unique_permutations(input_nums)
print(output)
# Expected output:
# [[1, 1, 2], [1, 2, 1], [2, 1, 1]]


[[1, 1, 2], [1, 2, 1], [2, 1, 1]]


In [29]:
#Question 5: Find All Dates in a Text

# Regular Expressions: The function uses regex patterns to match dates in the specified formats:

# dd-mm-yyyy: Matches days from 01 to 31, months from 01 to 12, and a four-digit year.
# mm/dd/yyyy: Matches months from 01 to 12, days from 01 to 31, and a four-digit year.
# yyyy.mm.dd: Matches a four-digit year, months from 01 to 12, and days from 01 to 31.
# Each pattern is wrapped with (?<!\d) and (?!\d) to ensure that they are not part of a larger number.
# Finding Matches: The re.findall() function is used to find all matches for each pattern in the input text.

# Reconstructing Dates: Depending on which pattern matched, the function reconstructs the date string in the appropriate format and appends it to the valid_dates list.

# Return Value: The function returns a list of valid dates found in the text.




In [15]:
import re
from typing import List

def find_all_dates(text: str) -> List[str]:
    """
    This function takes a string as input and returns a list of valid dates
    in 'dd-mm-yyyy', 'mm/dd/yyyy', or 'yyyy.mm.dd' format found in the string.
    
    Parameters:
    text (str): A string containing the dates in various formats.

    Returns:
    List[str]: A list of valid dates in the formats specified.
    """
    # Define regex patterns for the specified date formats
    patterns = [
        r'(?<!\d)([0-2][0-9]|3[01])-(0[1-9]|1[0-2])-(\d{4})(?!\d)',  # dd-mm-yyyy
        r'(?<!\d)(0[1-9]|1[0-2])/(0[1-9]|[12][0-9]|3[01])/(\d{4})(?!\d)',  # mm/dd/yyyy
        r'(?<!\d)(\d{4})\.(0[1-9]|1[0-2])\.(0[1-9]|[12][0-9]|3[01])(?!\d)'   # yyyy.mm.dd
    ]
    
    valid_dates = []

    # Search for all date formats in the text
    for pattern in patterns:
        matches = re.findall(pattern, text)
        for match in matches:
            # Reconstruct the date based on the format
            if '-' in pattern:  # dd-mm-yyyy
                valid_dates.append(f"{match[0]}-{match[1]}-{match[2]}")
            elif '/' in pattern:  # mm/dd/yyyy
                valid_dates.append(f"{match[0]}/{match[1]}/{match[2]}")
            elif '.' in pattern:  # yyyy.mm.dd
                valid_dates.append(f"{match[0]}.{match[1]}.{match[2]}")
    
    return valid_dates

# Example usage
text = "I was born on 23-08-1994, my friend on 08/23/1994, and another one on 1994.08.23."
found_dates = find_all_dates(text)
print(found_dates)
# Expected output: ['23-08-1994', '08/23/1994', '1994.08.23']


['23-08-1994', '08-23-1994', '1994-08-23']


In [30]:
#Question 6: Decode Polyline, Convert to DataFrame with Distances
# To implement the polyline_to_dataframe function that decodes
# a polyline string into a Pandas DataFrame with latitude, longitude, and distance between consecutive points


In [21]:
import pandas as pd
import numpy as np

def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
    """
    Calculate the distance between two points on the Earth using the Haversine formula.

    Args:
        lat1 (float): Latitude of the first point.
        lon1 (float): Longitude of the first point.
        lat2 (float): Latitude of the second point.
        lon2 (float): Longitude of the second point.

    Returns:
        float: Distance between the two points in meters.
    """
    R = 6371000  # Earth radius in meters
    phi1 = np.radians(lat1)
    phi2 = np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)

    a = np.sin(delta_phi / 2)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    return R * c  # Distance in meters

def decode_polyline(polyline_str: str):
    """Decode a Google Maps encoded polyline into a list of (latitude, longitude) tuples."""
    index = 0
    lat = 0
    lon = 0
    coordinates = []

    while index < len(polyline_str):
        # Decode latitude
        b, shift = 0, 0
        while True:
            byte = ord(polyline_str[index]) - 63
            index += 1
            b |= (byte & 0x1f) << shift
            shift += 5
            if byte < 0x20:
                break
        lat += ~(b >> 1) if (b & 1) else (b >> 1)

        # Decode longitude
        b, shift = 0, 0
        while True:
            byte = ord(polyline_str[index]) - 63
            index += 1
            b |= (byte & 0x1f) << shift
            shift += 5
            if byte < 0x20:
                break
        lon += ~(b >> 1) if (b & 1) else (b >> 1)

        coordinates.append((lat / 1E5, lon / 1E5))  # Convert to float coordinates

    return coordinates

def polyline_to_dataframe(polyline_str: str) -> pd.DataFrame:
    """
    Converts a polyline string into a DataFrame with latitude, longitude, and distance between consecutive points.
    
    Args:   
        polyline_str (str): The encoded polyline string.

    Returns:
        pd.DataFrame: A DataFrame containing latitude, longitude, and distance in meters.
    """
    # Decode the polyline string to a list of (latitude, longitude) tuples
    decoded_points = decode_polyline(polyline_str)

    # Create a DataFrame from the decoded points
    df = pd.DataFrame(decoded_points, columns=['latitude', 'longitude'])

    # Calculate the distance between consecutive points
    distances = [0]  # First point has no previous point
    for i in range(1, len(decoded_points)):
        dist = haversine(df.latitude[i-1], df.longitude[i-1], df.latitude[i], df.longitude[i])
        distances.append(dist)

    # Add the distances to the DataFrame
    df['distance'] = distances

    return df

# Example usage
polyline_str = "_p~iF~|wU~w@x@w@xA"
df = polyline_to_dataframe(polyline_str)
print(df)


   latitude  longitude     distance
0  38.50000   -3.73216     0.000000
1  38.49088   -3.73245  1014.411733
2  38.49116   -3.73290    50.032431


In [31]:
#Question 7: Matrix Rotation and Transformation
# To solve the problem of rotating a square matrix by 90 degrees clockwise
# and then transforming it by replacing each element with the sum of the elements
# in its original row and column (excluding itself), you can follow these steps:

# Rotate the Matrix: Implement a function that rotates the matrix.
# Transform the Matrix: After rotation, replace each element with the sum of its row and column (excluding itself).



In [23]:
from typing import List

def rotate_and_multiply_matrix(matrix: List[List[int]]) -> List[List[int]]:
    """
    Rotate the given matrix by 90 degrees clockwise, then transform each element 
    by replacing it with the sum of all elements in the same row and column (excluding itself).
    
    Args:
    - matrix (List[List[int]]): 2D list representing the matrix to be transformed.
    
    Returns:
    - List[List[int]]: A new 2D list representing the transformed matrix.
    """
    n = len(matrix)  # Get the size of the matrix (n x n)

    # Step 1: Rotate the matrix by 90 degrees clockwise
    rotated_matrix = [[0] * n for _ in range(n)]
    for i in range(n):
        for j in range(n):
            rotated_matrix[j][n - 1 - i] = matrix[i][j]

    # Step 2: Transform the matrix
    final_matrix = [[0] * n for _ in range(n)]
    for i in range(n):
        for j in range(n):
            # Calculate the sum of the current row and column excluding the current element
            row_sum = sum(rotated_matrix[i]) - rotated_matrix[i][j]
            col_sum = sum(rotated_matrix[k][j] for k in range(n)) - rotated_matrix[i][j]
            final_matrix[i][j] = row_sum + col_sum

    return final_matrix

# Example usage
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
result = rotate_and_multiply_matrix(matrix)
print(result)


[[22, 19, 16], [23, 20, 17], [24, 21, 18]]


In [24]:
#Question 8: Time Check
# To verify the completeness of the data based on the given conditions for each unique (id, id_2) pair in the dataset, you need to perform the following steps:

# Convert the Timestamp: Ensure the timestamp is in a datetime format for easy manipulation.
# Group by (id, id_2): Group the DataFrame by the (id, id_2) pairs.
# Check Conditions: For each group, check if the timestamps cover a full 24-hour period and span all 7 days of the week.
# Return a Boolean Series: Create a boolean series that indicates whether the timestamps are complete for each unique (id, id_2) pair.



In [25]:
import pandas as pd

def time_check(df: pd.DataFrame) -> pd.Series:
    """
    Verify the completeness of the data by checking whether the timestamps 
    for each unique (`id`, `id_2`) pair cover a full 24-hour and 7 days period.

    Args:
        df (pandas.DataFrame): DataFrame containing id, id_2, and timestamp columns.

    Returns:
        pd.Series: A boolean series indicating if each (id, id_2) pair has incorrect timestamps.
    """
    # Ensure timestamp is in datetime format
    df['start_timestamp'] = pd.to_datetime(df['startDay'].astype(str) + ' ' + df['startTime'])
    df['end_timestamp'] = pd.to_datetime(df['endDay'].astype(str) + ' ' + df['endTime'])

    # Group by id and id_2
    grouped = df.groupby(['id', 'id_2'])

    # Function to check completeness for each group
    def check_completeness(group):
        # Check if all 7 days are covered
        all_days_covered = len(group['start_timestamp'].dt.date.unique()) == 7

        # Check if the time covers a full 24-hour period for each day
        time_coverage = group['start_timestamp'].dt.time.min() <= pd.to_datetime('00:00:00').time() and \
                        group['end_timestamp'].dt.time.max() >= pd.to_datetime('23:59:59').time()

        return all_days_covered and time_coverage

    # Apply the completeness check and return a boolean series
    completeness_series = grouped.apply(check_completeness)

    # Convert the result to a Series with multi-index (id, id_2)
    completeness_series.index = pd.MultiIndex.from_tuples(completeness_series.index)
    
    return completeness_series

# Example usage
# df = pd.read_csv('dataset-1.csv')  # Load your dataset
# result = time_check(df)
# print(result)
