# Question 1: Reverse List by N Elements

In [1]:
from typing import Dict, List

import pandas as pd


In [2]:
def reverse_by_n_elements(lst: List[int], n: int) -> List[int]:
    """
    Reverses the input list by groups of n elements.
    """
    result = []  # This will hold the final reversed list
    length = len(lst)  # Get the length of the input list
    
    for i in range(0, length, n):
        group = []  # Temporary list to hold the current group
        # Collecting the current group of up to n elements
        for j in range(i, min(i + n, length)):
            group.append(lst[j])
        
        # Manually reverse the group and append to result
        for j in range(len(group) - 1, -1, -1):
            result.append(group[j])
    
    return result

# Example usage:
if __name__ == "__main__":
    print(reverse_by_n_elements([1, 2, 3, 4, 5, 6, 7, 8], 3))  
    print(reverse_by_n_elements([1, 2, 3, 4, 5], 2))            
    print(reverse_by_n_elements([10, 20, 30, 40, 50, 60, 70], 4))  


[3, 2, 1, 6, 5, 4, 8, 7]
[2, 1, 4, 3, 5]
[40, 30, 20, 10, 70, 60, 50]


In [3]:
def group_by_length(lst: List[str]) -> Dict[int, List[str]]:
    """
    Groups the strings by their length and returns a dictionary.
    """
    length_dict = {}  # Dictionary to hold lengths as keys and list of strings as values

    for string in lst:
        length = len(string)  # Get the length of the string
        
        # If the length is not in the dictionary, add it with an empty list
        if length not in length_dict:
            length_dict[length] = []
        
        # Append the string to the corresponding length list
        length_dict[length].append(string)

    # Sort the dictionary by keys (lengths)
    sorted_dict = dict(sorted(length_dict.items()))
    
    return sorted_dict

# Example usage:
if __name__ == "__main__":
    print(group_by_length(["apple", "bat", "car", "elephant", "dog", "bear"]))
    print(group_by_length(["one", "two", "three", "four"]))

{3: ['bat', 'car', 'dog'], 4: ['bear'], 5: ['apple'], 8: ['elephant']}
{3: ['one', 'two'], 4: ['four'], 5: ['three']}


# Question 3: Flatten a Nested Dictionary

In [4]:
from typing import Dict, Any
def flatten_dict(nested_dict: Dict[str, Any], sep: str = '.') -> Dict[str, Any]:
    """
    Flattens a nested dictionary into a single-level dictionary with dot notation for keys.
    
    :param nested_dict: The dictionary object to flatten
    :param sep: The separator to use between parent and child keys (defaults to '.')
    :return: A flattened dictionary
    """
    flattened = {}

    def flatten(current_dict: Dict[str, Any], parent_key: str = ''):
        for key, value in current_dict.items():
            new_key = f"{parent_key}{sep}{key}" if parent_key else key  # Construct new key

            if isinstance(value, dict):
                # Recursively flatten the dictionary
                flatten(value, new_key)
            elif isinstance(value, list):
                # Handle lists by iterating over their elements
                for index, item in enumerate(value):
                    if isinstance(item, dict):
                        flatten(item, f"{new_key}[{index}]")  # Flatten nested dicts in the list
                    else:
                        # Directly assign the value if it's not a dict
                        flattened[f"{new_key}[{index}]"] = item
            else:
                # Assign the value to the new flattened key
                flattened[new_key] = value

    flatten(nested_dict)
    return flattened

# Example usage:
if __name__ == "__main__":
    nested_example = {
        "road": {
            "name": "Highway 1",
            "length": 350,
            "sections": [
                {
                    "id": 1,
                    "condition": {
                        "pavement": "good",
                        "traffic": "moderate"
                    }
                }
            ]
        }
    }
    
    result = flatten_dict(nested_example)
    print(result)
    

{'road.name': 'Highway 1', 'road.length': 350, 'road.sections[0].id': 1, 'road.sections[0].condition.pavement': 'good', 'road.sections[0].condition.traffic': 'moderate'}


# Question 4: Generate Unique Permutations


In [5]:
def unique_permutations(nums: List[int]) -> List[List[int]]:
    """
    Generate all unique permutations of a list that may contain duplicates.
    
    :param nums: List of integers (may contain duplicates)
    :return: List of unique permutations
    """
    def backtrack(start: int):
        if start == len(nums):
            results.append(nums[:])
            return
        
        seen = set()
        for i in range(start, len(nums)):
            if nums[i] in seen:
                continue
            seen.add(nums[i])
            nums[start], nums[i] = nums[i], nums[start]
            backtrack(start + 1)
            nums[start], nums[i] = nums[i], nums[start]  

    results = []
    nums.sort()  
    backtrack(0)
    return results

# Example usage:
if __name__ == "__main__":
    input_list = [1, 1, 2]
    permutations = unique_permutations(input_list)

    # Print the permutations horizontally
    print("[")
    for perm in permutations:
        print(f"    {perm},")  # Indent and add a comma
    print("]")


[
    [1, 1, 2],
    [1, 2, 1],
    [2, 1, 1],
]


# Question 5: Find All Dates in a Text

In [6]:
import re
def find_all_dates(text: str) -> List[str]:
    """
    This function takes a string as input and returns a list of valid dates
    in 'dd-mm-yyyy', 'mm/dd/yyyy', or 'yyyy.mm.dd' format found in the string.
    
    :param text: A string containing the dates in various formats.
    :return: A list of valid dates in the formats specified.
    """
    # Regular expression for the three date formats:
    date_pattern = r"\b\d{2}-\d{2}-\d{4}\b|\b\d{2}/\d{2}/\d{4}\b|\b\d{4}\.\d{2}\.\d{2}\b"
    
    # Find all matches in the text
    dates = re.findall(date_pattern, text)
    
    return dates

# Example usage:
if __name__ == "__main__":
    text = "I was born on 23-08-1994, my friend on 08/23/1994, and another one on 1994.08.23."
    result = find_all_dates(text)
    print(result)
   


['23-08-1994', '08/23/1994', '1994.08.23']


 # Question 6: Decode Polyline, Convert to DataFrame with Distances# 

In [7]:
import polyline
import pandas as pd
from math import radians, sin, cos, sqrt, atan2
from typing import List, Tuple

def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
    """
    Calculate the great-circle distance between two points on the Earth's surface.
    The input coordinates are in decimal degrees, and the result is in meters.
    """
    R = 6371000  # Radius of the Earth in meters
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    
    distance = R * c
    return distance

def polyline_to_dataframe(polyline_str: str) -> pd.DataFrame:
    """
    Converts a polyline string into a DataFrame with latitude, longitude, and distance between consecutive points.
    
    Args:
        polyline_str (str): The encoded polyline string.

    Returns:
        pd.DataFrame: A DataFrame containing latitude, longitude, and distance in meters.
    """
    # Decode the polyline string into a list of (latitude, longitude) tuples
    coordinates = polyline.decode(polyline_str)
    
    # Create a DataFrame with latitude and longitude columns
    df = pd.DataFrame(coordinates, columns=['latitude', 'longitude'])
    
    # Initialize the distance column with 0 for the first row
    df['distance'] = 0.0
    
    # Calculate the distance between successive points using the Haversine formula
    for i in range(1, len(df)):
        lat1, lon1 = df.loc[i - 1, 'latitude'], df.loc[i - 1, 'longitude']
        lat2, lon2 = df.loc[i, 'latitude'], df.loc[i, 'longitude']
        df.loc[i, 'distance'] = haversine(lat1, lon1, lat2, lon2)
    
    return df

# Example usage:
if __name__ == "__main__":
    polyline_str = "_p~iF~ps|U_ulLnnqC_mqNvxq`@"
    df = polyline_to_dataframe(polyline_str)
    print(df)


ModuleNotFoundError: No module named 'polyline'

# Question 7: Matrix Rotation and Transformation

In [None]:
from typing import List

def rotate_and_sum_matrix(matrix: List[List[int]]) -> List[List[int]]:
    """
    Rotate the given matrix by 90 degrees clockwise, then replace each element with
    the sum of all elements in the same row and column excluding itself.
    
    Args:
    - matrix (List[List[int]]): 2D list representing the matrix to be transformed.
    
    Returns:
    - List[List[int]]: A new 2D list representing the transformed matrix.
    """
    
    # Step 1: Rotate the matrix by 90 degrees clockwise
    # Rotate by transposing and then reversing each row
    n = len(matrix)
    rotated_matrix = [[matrix[n - 1 - j][i] for j in range(n)] for i in range(n)]
    
    # Step 2: Replace each element with the sum of all elements in the same row and column, excluding itself
    final_matrix = [[0] * n for _ in range(n)]  # Create a new matrix to store the result
    
    for i in range(n):
        for j in range(n):
            row_sum = sum(rotated_matrix[i])  # Sum of the row
            col_sum = sum(rotated_matrix[k][j] for k in range(n))  # Sum of the column
            final_matrix[i][j] = row_sum + col_sum - rotated_matrix[i][j]  # Exclude the element itself
    
    return final_matrix
# Example usage:
matrix = [[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]

result = rotate_and_sum_matrix(matrix)
for row in result:
    print(row)

In [9]:
import pandas as pd

def time_check(df: pd.DataFrame) -> pd.Series:
    """
    Verifies the completeness of the time data by checking whether the timestamps for each unique (id, id_2) pair 
    cover a full 24-hour period and span all 7 days of the week.

    Args:
        df (pd.DataFrame): A DataFrame containing columns 'id', 'id_2', and timestamp information.

    Returns:
        pd.Series: A boolean series with a MultiIndex of (id, id_2), indicating if each pair has complete timestamps.
    """
    # Print column names to verify the existence of timestamp data
    print("Column names:", df.columns)
    
    # Assuming the actual column names are 'startTime' and 'endTime'
    if 'startTime' in df.columns and 'endTime' in df.columns:
        # Combine startTime and endTime into a single column if necessary
        df['timestamp'] = pd.to_datetime(df['startTime'])  # Example: use startTime as the main timestamp
    else:
        raise ValueError("Timestamp column or startTime/endTime not found.")

    # Extract date and time information from the timestamp
    df['day_of_week'] = df['timestamp'].dt.dayofweek  # Monday=0, Sunday=6
    df['time_of_day'] = df['timestamp'].dt.time

    # Group by (id, id_2)
    grouped = df.groupby(['id', 'id_2'])

    # Define a function to check for 7 days and full 24-hour span
    def check_completeness(group):
        # Check for 7 unique days (Monday-Sunday -> 0-6)
        days_covered = set(group['day_of_week'].unique())
        full_week = set(range(7))  # Full week: Monday (0) to Sunday (6)
        
        # Check if all days of the week are covered
        if days_covered != full_week:
            return False
        
        # Check for 24-hour span each day
        for day, day_group in group.groupby('day_of_week'):
            times = day_group['time_of_day']
            if times.min() > pd.Timestamp("00:00:00").time() or times.max() < pd.Timestamp("23:59:59").time():
                return False
        
        return True

    # Apply the completeness check to each group
    result = grouped.apply(check_completeness)

    return result

# Example usage:
df = pd.read_csv('dataset-1.csv')  # Load your dataset here
result = time_check(df)
print(result)


Column names: Index(['id', 'name', 'id_2', 'startDay', 'startTime', 'endDay', 'endTime',
       'able2Hov2', 'able2Hov3', 'able3Hov2', 'able3Hov3', 'able5Hov2',
       'able5Hov3', 'able4Hov2', 'able4Hov3'],
      dtype='object')


  df['timestamp'] = pd.to_datetime(df['startTime'])  # Example: use startTime as the main timestamp


id       id_2    
1014000  -1          False
1014002  -1          False
1014003  -1          False
1030000  -1          False
          1030002    False
                     ...  
1330016   1330006    False
          1330008    False
          1330010    False
          1330012    False
          1330014    False
Length: 9254, dtype: bool
