In [None]:
"""
1. Question: SQL-to-JSON Transformation
You are given a list of rows from an SQL database represented as a list of dictionaries, where each dictionary corresponds to a row. Each row contains user_id, name, age, and city.

Write a Python function transform_data(rows: List[Dict[str, Union[str, int]]]) -> Dict[int, Dict[str, Union[str, int]]] that transforms the data into a dictionary format where the keys are the user_id values, and the corresponding value is a dictionary of the remaining fields (name, age, city).

Example:
rows = [
    {"user_id": 1, "name": "Alice", "age": 25, "city": "New York"},
    {"user_id": 2, "name": "Bob", "age": 30, "city": "San Francisco"}
]
Output:
{
    1: {"name": "Alice", "age": 25, "city": "New York"},
    2: {"name": "Bob", "age": 30, "city": "San Francisco"}
}
"""

In [5]:
from typing import List, Dict, Union

def transform_data(rows: List[Dict[str, Union[str, int]]]) -> Dict[int, Dict[str, Union[str, int]]]:
    transformed_data = {}
    
    # Iterate through each row
    for row in rows:
        # Extract the user_id
        user_id = row["user_id"]
        # Create a new dictionary for the remaining fields (name, age, city)
        transformed_data[user_id] = {
            "name": row["name"],
            "age": row["age"],
            "city": row["city"]
        }
    
    return transformed_data

# Example usage
rows = [
    {"user_id": 1, "name": "Alice", "age": 25, "city": "New York"},
    {"user_id": 2, "name": "Bob", "age": 30, "city": "San Francisco"}
]

output = transform_data(rows)
print(output)



{1: {'name': 'Alice', 'age': 25, 'city': 'New York'}, 2: {'name': 'Bob', 'age': 30, 'city': 'San Francisco'}}


In [None]:
"""
2. Question: Process Log Files
You are given a list of log file entries, where each entry is a string in the format "[timestamp] user_id action". The action can be either "start" or "stop". Your task is to write a Python function process_logs(logs: List[str]) -> Dict[int, int] that returns a dictionary mapping each user_id to the total duration (in seconds) they were active (i.e., between start and stop).

Assume:

All start actions are followed by a stop for the same user.
The timestamp is in the format "YYYY-MM-DD HH:MM:SS".
Example:
logs = [
    "[2024-01-01 10:00:00] 1 start",
    "[2024-01-01 10:05:00] 1 stop",
    "[2024-01-01 10:00:00] 2 start",
    "[2024-01-01 10:10:00] 2 stop"
]
Output:
{
    1: 300,  # 5 minutes
    2: 600   # 10 minutes
}
"""

In [7]:
from typing import List, Dict
from datetime import datetime

def process_logs(logs: List[str]) -> Dict[int, int]:
    user_sessions = {}
    user_start_times = {}

    # Iterate through each log entry
    for log in logs:
        # Remove the square brackets and split the log entry into components
        parts = log.replace('[', '').replace(']', '').split()
        timestamp_str = parts[0] + ' ' + parts[1]  # Combine date and time parts
        timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")  # Convert to datetime object
        user_id = int(parts[2])
        action = parts[3]

        if action == "start":
            # Record the start time for this user
            user_start_times[user_id] = timestamp
        elif action == "stop":
            # Calculate the duration for this session and update the total time
            start_time = user_start_times.pop(user_id, None)
            if start_time:
                duration = (timestamp - start_time).total_seconds()
                user_sessions[user_id] = user_sessions.get(user_id, 0) + int(duration)

    return user_sessions

# Example usage
logs = [
    "[2024-01-01 10:00:00] 1 start",
    "[2024-01-01 10:05:00] 1 stop",
    "[2024-01-01 10:00:00] 2 start",
    "[2024-01-01 10:10:00] 2 stop"
]

output = process_logs(logs)
print(output)



{1: 300, 2: 600}


In [None]:
"""
3.Question: Data Deduplication
You are given a list of dictionaries representing records, where each dictionary contains id, name, and email. Write a Python function deduplicate(records: List[Dict[str, str]]) -> List[Dict[str, str]] that removes duplicate records based on the email field and returns a list of unique records. Keep the first occurrence of each email.

Example:
records = [
    {"id": "1", "name": "Alice", "email": "alice@example.com"},
    {"id": "2", "name": "Bob", "email": "bob@example.com"},
    {"id": "3", "name": "Alice", "email": "alice@example.com"}
]
Output:
[
    {"id": "1", "name": "Alice", "email": "alice@example.com"},
    {"id": "2", "name": "Bob", "email": "bob@example.com"}
]
"""

In [8]:
from typing import List, Dict

def deduplicate(records: List[Dict[str, str]]) -> List[Dict[str, str]]:
    seen_emails = set()  # Set to track seen emails
    unique_records = []  # List to store unique records

    for record in records:
        email = record["email"]
        if email not in seen_emails:
            seen_emails.add(email)  # Add the email to the set
            unique_records.append(record)  # Add the record to the unique list

    return unique_records

# Example usage
records = [
    {"id": "1", "name": "Alice", "email": "alice@example.com"},
    {"id": "2", "name": "Bob", "email": "bob@example.com"},
    {"id": "3", "name": "Alice", "email": "alice@example.com"}
]

output = deduplicate(records)
print(output)



[{'id': '1', 'name': 'Alice', 'email': 'alice@example.com'}, {'id': '2', 'name': 'Bob', 'email': 'bob@example.com'}]


In [None]:
"""
4. Question: Sliding Window Aggregation
You are given a list of integers representing transaction amounts and a window size k. Write a Python function sliding_window_sum(transactions: List[int], k: int) -> List[int] that returns a list containing the sum of every sliding window of size k.

Example:
transactions = [10, 20, 30, 40, 50]
k = 3
Output:
[60, 90, 120]
Explanation:
Sum of the first 3 transactions: 10 + 20 + 30 = 60
Sum of the next 3 transactions: 20 + 30 + 40 = 90
Sum of the last 3 transactions: 30 + 40 + 50 = 120
"""

In [9]:
from typing import List

def sliding_window_sum(transactions: List[int], k: int) -> List[int]:
    window_sums = []
    
    # Loop through the list and calculate the sum for each sliding window of size k
    for i in range(len(transactions) - k + 1):
        window_sum = sum(transactions[i:i+k])  # Sum of the current window
        window_sums.append(window_sum)
    
    return window_sums

# Example usage
transactions = [10, 20, 30, 40, 50]
k = 3

output = sliding_window_sum(transactions, k)
print(output)


[60, 90, 120]


In [None]:
"""
5. Question: Flatten Nested JSON
You are given a nested JSON-like dictionary structure. Write a Python function flatten_json(nested_dict: Dict[str, Any]) -> Dict[str, Any] that flattens the dictionary, where nested keys are represented as a concatenation of keys separated by a period (.).

Example:
nested_dict = {
    "user": {
        "id": 1,
        "details": {
            "name": "Alice",
            "address": {
                "city": "New York",
                "zipcode": 10001
            }
        }
    }
}
Output:
{
    "user.id": 1,
    "user.details.name": "Alice",
    "user.details.address.city": "New York",
    "user.details.address.zipcode": 10001
}
"""

In [10]:
from typing import Dict, Any

def flatten_json(nested_dict: Dict[str, Any], parent_key: str = '', sep: str = '.') -> Dict[str, Any]:
    flat_dict = {}
    
    # Iterate through the dictionary
    for key, value in nested_dict.items():
        # Create a new key by appending the current key to the parent key
        new_key = f"{parent_key}{sep}{key}" if parent_key else key
        
        # If the value is a dictionary, recursively flatten it
        if isinstance(value, dict):
            flat_dict.update(flatten_json(value, new_key, sep=sep))
        else:
            flat_dict[new_key] = value
    
    return flat_dict

# Example usage
nested_dict = {
    "user": {
        "id": 1,
        "details": {
            "name": "Alice",
            "address": {
                "city": "New York",
                "zipcode": 10001
            }
        }
    }
}

output = flatten_json(nested_dict)
print(output)


{'user.id': 1, 'user.details.name': 'Alice', 'user.details.address.city': 'New York', 'user.details.address.zipcode': 10001}


In [None]:
"""
6. Question: Find Top N Frequent Words
You are given a list of words, and you need to find the top N most frequent words. Write a Python function top_n_frequent_words(words: List[str], n: int) -> List[str] that returns a list of the top N frequent words. The result should be sorted by frequency, and if two words have the same frequency, they should be sorted lexicographically.

Example:
words = ["apple", "banana", "apple", "orange", "banana", "apple"]
n = 2
Output:
["apple", "banana"]
"""

In [11]:
from typing import List
from collections import Counter

def top_n_frequent_words(words: List[str], n: int) -> List[str]:
    # Count the frequency of each word
    word_count = Counter(words)
    
    # Sort the words first by frequency (in descending order), 
    # then by lexicographical order (in ascending order)
    sorted_words = sorted(word_count.keys(), key=lambda word: (-word_count[word], word))
    
    # Return the top N frequent words
    return sorted_words[:n]

# Example usage
words = ["apple", "banana", "apple", "orange", "banana", "apple"]
n = 2

output = top_n_frequent_words(words, n)
print(output)


['apple', 'banana']


In [None]:
"""
7. Question: Merge DataFrames
You are given two lists of dictionaries, employees and departments, which represent two tables in a relational database. Each employee has a department_id that links them to a department. Write a Python function merge_data(employees: List[Dict[str, Union[str, int]]], departments: List[Dict[str, str]]) -> List[Dict[str, Union[str, int]]] that merges the two datasets into one, adding the department_name to each employee's data.

Example:
employees = [
    {"id": 1, "name": "Alice", "department_id": 2},
    {"id": 2, "name": "Bob", "department_id": 1}
]
departments = [
    {"id": 1, "department_name": "Engineering"},
    {"id": 2, "department_name": "Marketing"}
]
Output:
[
    {"id": 1, "name": "Alice", "department_id": 2, "department_name": "Marketing"},
    {"id": 2, "name": "Bob", "department_id": 1, "department_name": "Engineering"}
]"""


In [12]:
from typing import List, Dict, Union

def merge_data(employees: List[Dict[str, Union[str, int]]], departments: List[Dict[str, str]]) -> List[Dict[str, Union[str, int]]]:
    # Create a dictionary for departments using id as key
    department_dict = {department['id']: department['department_name'] for department in departments}
    
    # Merge the data by adding department_name to each employee's record
    merged_data = []
    for employee in employees:
        department_id = employee['department_id']
        department_name = department_dict.get(department_id, None)  # Fetch the department name
        merged_employee = {**employee, 'department_name': department_name}
        merged_data.append(merged_employee)
    
    return merged_data

# Example usage
employees = [
    {"id": 1, "name": "Alice", "department_id": 2},
    {"id": 2, "name": "Bob", "department_id": 1}
]

departments = [
    {"id": 1, "department_name": "Engineering"},
    {"id": 2, "department_name": "Marketing"}
]

output = merge_data(employees, departments)
print(output)


[{'id': 1, 'name': 'Alice', 'department_id': 2, 'department_name': 'Marketing'}, {'id': 2, 'name': 'Bob', 'department_id': 1, 'department_name': 'Engineering'}]


In [None]:
"""
8. Question: Moving Average of a Data Stream
Write a class MovingAverage that computes the moving average of the last k values from a data stream. The class should implement two methods:

__init__(self, k: int) initializes the moving average with the window size k.
next(self, val: int) -> float returns the moving average of the last k values.
Example:
ma = MovingAverage(3)
print(ma.next(10))  # returns 10.0
print(ma.next(20))  # returns 15.0
print(ma.next(30))  # returns 20.0
print(ma.next(40))  # returns 30.0
Explanation:
After the first value (10), the average is 10.
After the second value (20), the average of [10, 20] is 15.
After the third value (30), the average of [10, 20, 30] is 20.
After the fourth value (40), the window shifts to [20, 30, 40], and the average is 30.
"""

In [13]:
from collections import deque

class MovingAverage:
    def __init__(self, k: int):
        """
        Initialize the moving average with the window size k.
        """
        self.k = k
        self.window = deque()  # To store the last k values
        self.window_sum = 0  # To keep the sum of the window elements

    def next(self, val: int) -> float:
        """
        Add a new value to the window and return the moving average of the last k values.
        """
        # If the window is full (more than k elements), remove the oldest element
        if len(self.window) == self.k:
            self.window_sum -= self.window.popleft()

        # Add the new value to the window
        self.window.append(val)
        self.window_sum += val
        
        # Return the moving average
        return self.window_sum / len(self.window)

# Example usage:
ma = MovingAverage(3)
print(ma.next(10))  # returns 10.0
print(ma.next(20))  # returns 15.0
print(ma.next(30))  # returns 20.0
print(ma.next(40))  # returns 30.0


10.0
15.0
20.0
30.0


In [None]:
"""
9. Question: JSON Field Renaming
You are given a list of dictionaries representing JSON objects, where each dictionary has fields name, age, and location. Your task is to write a Python function rename_fields(data: List[Dict[str, Union[str, int]]]) -> List[Dict[str, Union[str, int]]] that renames the field location to city for each dictionary in the list.

Example:
data = [
    {"name": "Alice", "age": 25, "location": "New York"},
    {"name": "Bob", "age": 30, "location": "San Francisco"}
]
Output:
[
    {"name": "Alice", "age": 25, "city": "New York"},
    {"name": "Bob", "age": 30, "city": "San Francisco"}
]
"""

In [15]:
from typing import List, Dict, Union

def rename_fields(data: List[Dict[str, Union[str, int]]]) -> List[Dict[str, Union[str, int]]]:
    for item in data:
        # If the key 'location' exists, rename it to 'city'
        if 'location' in item:
            item['city'] = item.pop('location')
    return data

# Example usage
data = [
    {"name": "Alice", "age": 25, "location": "New York"},
    {"name": "Bob", "age": 30, "location": "San Francisco"}
]

output = rename_fields(data)
print(output)


[{'name': 'Alice', 'age': 25, 'city': 'New York'}, {'name': 'Bob', 'age': 30, 'city': 'San Francisco'}]


In [None]:
"""
10. Question: Reformat Date Strings
You are given a list of date strings in the format "DD-MM-YYYY". Write a Python function reformat_dates(dates: List[str]) -> List[str] that reformats each date into the format "YYYY-MM-DD".

Example:
dates = ["31-12-2024", "01-01-2024"]
Output:
["2024-12-31", "2024-01-01"]
"""

In [16]:
from typing import List

def reformat_dates(dates: List[str]) -> List[str]:
    reformatted_dates = []
    for date in dates:
        day, month, year = date.split('-')  # Split the date string by the '-' delimiter
        # Reformat and append the date in "YYYY-MM-DD" format
        reformatted_dates.append(f"{year}-{month}-{day}")
    return reformatted_dates

# Example usage
dates = ["31-12-2024", "01-01-2024"]
output = reformat_dates(dates)
print(output)


['2024-12-31', '2024-01-01']
