# Python Section 1

## Question 1: Reverse List by N Elements

Problem Statement:

Write a function that takes a list and an integer n, and returns the list with every group of n elements reversed. If there are fewer than n elements left at the end, reverse all of them.

Requirements:

1. You must not use any built-in slicing or reverse functions to directly reverse the sublists.
2. The result should reverse the elements in groups of size n.

In [1]:
from typing import List

def reverse_by_n_elements(lst: List[int], n: int) -> List[int]:
    """
    Reverses the input list by groups of n elements.
    If the number of elements at the end is less than n, reverse all of them.
    
    Args:
        lst (List[int]): The list to be reversed in groups.
        n (int): The size of each group to reverse.
    
    Returns:
        List[int]: The list with elements reversed in groups of n.
    """
    result = []
    for i in range(0, len(lst), n):
        group = lst[i:i + n]  # Take a slice of n elements
        # Reverse manually without using built-in reverse functions
        reversed_group = []
        for j in range(len(group) - 1, -1, -1):
            reversed_group.append(group[j])
        result.extend(reversed_group)
    
    return result

# Get input from the user
lst_input = input("Enter a list of integers separated by spaces: ")
n = int(input("Enter the group size (n): "))

# Convert the input string into a list of integers
lst = list(map(int, lst_input.split()))

# Reverse by groups of n elements
result = reverse_by_n_elements(lst, n)

# Output the result
print(f"List reversed by groups of {n}: {result}")


Enter a list of integers separated by spaces:  1 2 3 4 5 6 7 8 
Enter the group size (n):  3


List reversed by groups of 3: [3, 2, 1, 6, 5, 4, 8, 7]


## Question 2: Lists & Dictionaries

Problem Statement:

Write a function that takes a list of strings and groups them by their length. The result should be a dictionary where:

1. The keys are the string lengths.
2. The values are lists of strings that have the same length as the key.

Requirements:

1. Each string should appear in the list corresponding to its length.
2. The result should be sorted by the lengths (keys) in ascending order.

In [2]:
from typing import List, Dict

def group_by_length(lst: List[str]) -> Dict[int, List[str]]:
    """
    Groups the strings by their length and returns a dictionary.
    The keys are the lengths, and the values are lists of strings of that length.
    
    Args:
        lst (List[str]): A list of strings.
    
    Returns:
        Dict[int, List[str]]: A dictionary where keys are string lengths and values are lists of strings of that length.
    """
    result = {}
    for word in lst:
        length = len(word)
        if length not in result:
            result[length] = []
        result[length].append(word)
    
    # Sorting the dictionary by keys (string lengths)
    return dict(sorted(result.items()))

# Prompting the user for input
user_input = input("Enter a list of words separated by commas: ").split(',')

# Stripping extra spaces around words
user_input = [word.strip() for word in user_input]

# Call the function
output = group_by_length(user_input)

# Display the result
print("The strings grouped by their lengths are:")
for length, words in output.items():
    print(f"Length {length}: {words}")


Enter a list of words separated by commas:  apple , bat , car , elephant , dog , bear 


The strings grouped by their lengths are:
Length 3: ['bat', 'car', 'dog']
Length 4: ['bear']
Length 5: ['apple']
Length 8: ['elephant']


## Question 3: Flatten a Nested Dictionary


You are given a nested dictionary that contains various details (including lists and sub-dictionaries). Your task is to write a Python function that flattens the dictionary such that:

1. Nested keys are concatenated into a single key with levels separated by a dot (.).
2. List elements should be referenced by their index, enclosed in square brackets (e.g., sections[0]).

For example, if a key points to a list, the index of the list element should be appended to the key string, followed by a dot to handle further nested dictionaries.

Requirements:

1. Nested Dictionary: Flatten nested dictionaries into a single level, concatenating keys.
2. Handling Lists: Flatten lists by using the index as part of the key.
3. Key Separator: Use a dot (.) as a separator between nested key levels.
4. Empty Input: The function should handle empty dictionaries gracefully.
5. Nested Depth: You can assume the dictionary has a maximum of 4 levels of nesting.

In [3]:
from typing import Dict, Any, Union, List

def flatten_dict(nested_dict: Dict[str, Any], sep: str = '.') -> Dict[str, Any]:
    """
    Flattens a nested dictionary into a single-level dictionary with dot notation for keys.
    
    :param nested_dict: The dictionary object to flatten
    :param sep: The separator to use between parent and child keys (defaults to '.')
    :return: A flattened dictionary
    """
    def flatten(item: Union[Dict, List], parent_key: str = '') -> Dict[str, Any]:
        items = {}
        if isinstance(item, dict):
            for key, value in item.items():
                new_key = f"{parent_key}{sep}{key}" if parent_key else key
                items.update(flatten(value, new_key))
        elif isinstance(item, list):
            for index, value in enumerate(item):
                new_key = f"{parent_key}[{index}]"
                items.update(flatten(value, new_key))
        else:
            items[parent_key] = item
        return items

    return flatten(nested_dict)

# Example usage
nested_dict = {
    "road": {
        "name": "Highway 1",
        "length": 350,
        "sections": [
            {
                "id": 1,
                "condition": {
                    "pavement": "good",
                    "traffic": "moderate"
                }
            }
        ]
    }
}

flattened_dict = flatten_dict(nested_dict)
print(flattened_dict)

{'road.name': 'Highway 1', 'road.length': 350, 'road.sections[0].id': 1, 'road.sections[0].condition.pavement': 'good', 'road.sections[0].condition.traffic': 'moderate'}


## Question 4: Generate Unique Permutations

Problem Statement:

You are given a list of integers that may contain duplicates. Your task is to generate all unique permutations of the list. The output should not contain any duplicate permutations.

In [4]:
from typing import List

def unique_permutations(nums: List[int]) -> List[List[int]]:
    """
    Generate all unique permutations of a list that may contain duplicates.
    
    :param nums: List of integers (may contain duplicates)
    :return: List of unique permutations
    """
    def backtrack(start: int):
        if start == len(nums):
            result.append(nums[:])  # Append a copy of the current permutation
            return
        seen = set()  # To keep track of duplicates at this position
        for i in range(start, len(nums)):
            if nums[i] in seen:
                continue  # Skip duplicates
            seen.add(nums[i])  # Mark this element as seen
            nums[start], nums[i] = nums[i], nums[start]  # Swap to fix the current number
            backtrack(start + 1)  # Recur for the next index
            nums[start], nums[i] = nums[i], nums[start]  # Backtrack (swap back)

    nums.sort()  # Sort to handle duplicates
    result = []
    backtrack(0)
    return result

# User input section
input_str = input("Enter a list of integers separated by spaces (duplicates allowed): ")
input_nums = list(map(int, input_str.split()))
output_permutations = unique_permutations(input_nums)

print("Unique permutations:")
for perm in output_permutations:
    print(perm)


Enter a list of integers separated by spaces (duplicates allowed):  1 1 2


Unique permutations:
[1, 1, 2]
[1, 2, 1]
[2, 1, 1]


## Question 5: Find All Dates in a Text

Problem Statement:

You are given a string that contains dates in various formats (such as "dd-mm-yyyy", "mm/dd/yyyy", "yyyy.mm.dd", etc.). Your task is to identify and return all the valid dates present in the string.

You need to write a function find_all_dates that takes a string as input and returns a list of valid dates found in the text. The dates can be in any of the following formats:

1. dd-mm-yyyy
2. mm/dd/yyyy
3. yyyy.mm.dd

You are required to use regular expressions to identify these dates.

In [5]:
import re
from typing import List

def find_all_dates(text: str) -> List[str]:
    """
    This function takes a string as input and returns a list of valid dates
    in 'dd-mm-yyyy', 'mm/dd/yyyy', or 'yyyy.mm.dd' format found in the string.
    
    Parameters:
    text (str): A string containing the dates in various formats.

    Returns:
    List[str]: A list of valid dates in the formats specified.
    """
    # Regular expression patterns for the date formats
    patterns = [
        r'\b(\d{2}-\d{2}-\d{4})\b',  # dd-mm-yyyy
        r'\b(\d{2}/\d{2}/\d{4})\b',  # mm/dd/yyyy
        r'\b(\d{4}\.\d{2}\.\d{2})\b'   # yyyy.mm.dd
    ]
    
    # Combine all patterns into one regex
    combined_pattern = '|'.join(patterns)
    
    # Find all matches in the text
    matches = re.findall(combined_pattern, text)
    
    # Flatten the list of matches
    dates = [date for match in matches for date in match if date]

    return dates

# User input section
text = input("Enter a string containing dates in various formats: ")
output_dates = find_all_dates(text)

# Desired output format
print("Input:", text)
print("Output:", output_dates)


Enter a string containing dates in various formats:  I was born on 23-08-1994, my friend on 08/23/1994, and another one on 1994.08.23.


Input: I was born on 23-08-1994, my friend on 08/23/1994, and another one on 1994.08.23.
Output: ['23-08-1994', '08/23/1994', '1994.08.23']


## Question 6: Decode Polyline, Convert to DataFrame with Distances

You are given a polyline string, which encodes a series of latitude and longitude coordinates. Polyline encoding is a method to efficiently store latitude and longitude data using fewer bytes. The Python polyline module allows you to decode this string into a list of coordinates.

Write a function that performs the following operations:

1. Decode the polyline string using the polyline module into a list of (latitude, longitude) coordinates.
2. Convert these coordinates into a Pandas DataFrame with the following columns:
   (i). latitude: Latitude of the coordinate.
   (ii). longitude: Longitude of the coordinate.
   (iii). distance: The distance (in meters) between the current row's coordinate and the previous row's one. The first row               will have a distance of 0 since there is no previous point.
3. Calculate the distance using the Haversine formula for points in successive rows.

In [6]:
import polyline
import pandas as pd
import numpy as np

def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance in meters between two points
    on the Earth specified in decimal degrees using the Haversine formula.
    """
    # Convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    
    # Haversine formula
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    
    # Radius of Earth in meters (mean radius)
    r = 6371000  
    return c * r

def decode_polyline_to_df(polyline_str: str) -> pd.DataFrame:
    """
    Decode a polyline string into a DataFrame with distances between coordinates.
    
    :param polyline_str: The encoded polyline string.
    :return: A Pandas DataFrame with latitude, longitude, and distance columns.
    """
    # Decode the polyline string into a list of (latitude, longitude) tuples
    coordinates = polyline.decode(polyline_str)
    
    # Create a DataFrame from the coordinates
    df = pd.DataFrame(coordinates, columns=['latitude', 'longitude'])
    
    # Calculate distances using the Haversine formula
    distances = [0.0]  # The first distance is 0 since there's no previous point
    for i in range(1, len(df)):
        dist = haversine(df.latitude[i-1], df.longitude[i-1], 
                         df.latitude[i], df.longitude[i])
        distances.append(dist)
    
    df['distance'] = distances
    return df

# Example polyline string (for demonstration)
polyline_str = "_p~iF~ps|U_ulL~|sx~u@~|bq~o@qz~s@"

# Convert to DataFrame
df = decode_polyline_to_df(polyline_str)

# Display the resulting DataFrame
print(df)


     latitude   longitude      distance
0    38.50000  -120.20000  0.000000e+00
1    40.70000 -9346.62912  1.002670e+07
2 -8177.86224 -9337.94631  6.818307e+06


## Question 7: Matrix Rotation and Transformation

Write a function that performs the following operations on a square matrix (n x n):

1. Rotate the matrix by 90 degrees clockwise.
2. After rotation, for each element in the rotated matrix, replace it with the sum of all elements in the same row and column      (in the rotated matrix), excluding itself.

The function should return the transformed matrix.

In [7]:
def rotate_matrix(matrix):
    """
    Rotate the matrix by 90 degrees clockwise.
    
    :param matrix: The input square matrix (n x n).
    :return: The rotated matrix.
    """
    n = len(matrix)
    rotated = [[0] * n for _ in range(n)]
    
    for i in range(n):
        for j in range(n):
            rotated[j][n - 1 - i] = matrix[i][j]
    
    return rotated

def transform_matrix(matrix):
    """
    Transform the rotated matrix by replacing each element with the sum
    of all elements in the same row and column, excluding itself.
    
    :param matrix: The rotated square matrix (n x n).
    :return: The transformed matrix.
    """
    n = len(matrix)
    transformed = [[0] * n for _ in range(n)]
    
    for i in range(n):
        for j in range(n):
            row_sum = sum(matrix[i])  # Sum of the current row
            col_sum = sum(matrix[k][j] for k in range(n))  # Sum of the current column
            transformed[i][j] = row_sum + col_sum - matrix[i][j]  # Exclude the element itself
    
    return transformed

def rotate_and_transform(matrix):
    """
    Rotate the matrix by 90 degrees clockwise and transform it.
    
    :param matrix: The input square matrix (n x n).
    :return: The rotated and final transformed matrix.
    """
    rotated_matrix = rotate_matrix(matrix)
    final_matrix = transform_matrix(rotated_matrix)
    return rotated_matrix, final_matrix

def create_matrix_input():
    """
    Create a square matrix input from user.
    
    :return: A square matrix (list of lists).
    """
    n = int(input("Enter the size of the square matrix (n x n): "))
    matrix = []

    print(f"Enter the elements of the {n} x {n} matrix row by row:")
    for i in range(n):
        row = list(map(int, input(f"Row {i + 1}: ").split()))
        if len(row) != n:
            print(f"Please enter exactly {n} integers for row {i + 1}.")
            return create_matrix_input()  # Restart input if the row size is incorrect
        matrix.append(row)
    
    return matrix

# Example usage
user_matrix = create_matrix_input()  # Allow user to input the matrix
print("\nInput matrix:")
print(user_matrix)

rotated_matrix, final_result = rotate_and_transform(user_matrix)

# Display the rotated and final transformed matrices
print("\nRotated matrix (90 degrees clockwise):")
print(rotated_matrix)

print("\nFinal transformed matrix:")
print(final_result)


Enter the size of the square matrix (n x n):  3


Enter the elements of the 3 x 3 matrix row by row:


Row 1:  1 2 3
Row 2:  4 5 6
Row 3:  7 8 9



Input matrix:
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

Rotated matrix (90 degrees clockwise):
[[7, 4, 1], [8, 5, 2], [9, 6, 3]]

Final transformed matrix:
[[29, 23, 17], [31, 25, 19], [33, 27, 21]]


## Question 8: Time Check

You are given a dataset, dataset-1.csv, containing columns id, id_2, and timestamp (startDay, startTime, endDay, endTime). The goal is to verify the completeness of the time data by checking whether the timestamps for each unique (id, id_2) pair cover a full 24-hour period (from 12:00:00 AM to 11:59:59 PM) and span all 7 days of the week (from Monday to Sunday).

Create a function that accepts dataset-1.csv as a DataFrame and returns a boolean series that indicates if each (id, id_2) pair has incorrect timestamps. The boolean series must have multi-index (id, id_2).

In [8]:
import pandas as pd

# Load the dataset (replace 'dataset-1.csv' with your actual file path)
df = pd.read_csv('C:/Users/Nitesh Sharma/Submission/dataset-1.csv')

# Display the first few rows to inspect the date and time columns
print("Dataset Preview:")
print(df[['startDay', 'startTime', 'endDay', 'endTime']].head())

def time_check(df: pd.DataFrame) -> pd.Series:
    """
    Verify the completeness of the data by checking whether the timestamps for each unique (id, id_2) pair 
    cover a full 24-hour and 7-day period.
    
    Args:
        df (pandas.DataFrame): The input DataFrame containing timestamp data.

    Returns:
        pd.Series: A boolean series indicating whether each (id, id_2) pair has incorrect timestamps.
    """
    # Check for missing values
    if df[['startDay', 'startTime', 'endDay', 'endTime']].isnull().any().any():
        print("Missing values detected in date or time columns.")
        return pd.Series([True] * len(df))

    # Specify format explicitly and handle parsing errors
    df['start_datetime'] = pd.to_datetime(df['startDay'] + ' ' + df['startTime'], format='%Y-%m-%d %H:%M', errors='coerce')
    df['end_datetime'] = pd.to_datetime(df['endDay'] + ' ' + df['endTime'], format='%Y-%m-%d %H:%M', errors='coerce')

    # Handle potential parsing issues
    if df['start_datetime'].isnull().any() or df['end_datetime'].isnull().any():
        print("Invalid date or time format detected. Please check the data.")
        return pd.Series([True] * len(df))

    grouped = df.groupby(['id', 'id_2'])

    def check_group(group):
        # Generate a date range for the complete time period
        days = pd.date_range(start=group['start_datetime'].min().normalize(), 
                             end=group['end_datetime'].max().normalize(), 
                             freq='D')
        unique_days = group['start_datetime'].dt.date.unique()
        time_range = group['end_datetime'].max() - group['start_datetime'].min()

        # Check for 7 unique days and a time range of at least 24 hours
        return len(unique_days) < 7 or time_range < pd.Timedelta(hours=24)

    result = grouped.apply(check_group)

    return result

# Check the timestamps for completeness
incorrect_timestamps = time_check(df)

# Display the result
print("Incorrect Timestamps:")
print(incorrect_timestamps)


Dataset Preview:
   startDay startTime     endDay   endTime
0    Monday  05:00:00  Wednesday  10:00:00
1    Monday  10:00:00     Friday  15:00:00
2  Thursday  15:00:00     Friday  19:00:00
3    Monday  19:00:00     Friday  23:59:59
4  Saturday  00:00:00     Sunday  23:59:59
Invalid date or time format detected. Please check the data.
Incorrect Timestamps:
0        True
1        True
2        True
3        True
4        True
         ... 
39509    True
39510    True
39511    True
39512    True
39513    True
Length: 39514, dtype: bool
