In [1]:
#1) Question: SQL-to-JSON Transformation
'''You are given a list of rows from an SQL database represented as a list of dictionaries, 
where each dictionary corresponds to a row. Each row contains user_id, name, age, and city.
Write a Python function transform_data(rows: List[Dict[str, Union[str, int]]]) -> Dict[int, Dict[str, Union[str, int]]] 
that transforms the data into a dictionary format where the keys are the user_id values,
and the corresponding value is a dictionary of the remaining fields (name, age, city).'''

'''
Example:
rows = [
    {"user_id": 1, "name": "Alice", "age": 25, "city": "New York"},
    {"user_id": 2, "name": "Bob", "age": 30, "city": "San Francisco"}
]

Output:
{
    1: {"name": "Alice", "age": 25, "city": "New York"},
    2: {"name": "Bob", "age": 30, "city": "San Francisco"}
}'''

from typing import List, Dict, Union

def transform_data(rows: List[Dict[str, Union[str, int]]]) -> Dict[int, Dict[str, Union[str, int]]]:
    return {
        row['user_id']: {key: row[key] for key in ['name', 'age', 'city']}
        for row in rows
    }
rows = [
    {"user_id": 1, "name": "Alice", "age": 25, "city": "New York"},
    {"user_id": 2, "name": "Bob", "age": 30, "city": "San Francisco"}
]

output = transform_data(rows)
print(output)

{1: {'name': 'Alice', 'age': 25, 'city': 'New York'}, 2: {'name': 'Bob', 'age': 30, 'city': 'San Francisco'}}


In [2]:
#3)
''' Question: Data Deduplication
You are given a list of dictionaries representing records, 
where each dictionary contains id, name, and email. 
Write a Python function deduplicate(records: List[Dict[str, str]]) -> List[Dict[str, str]]
that removes duplicate records based on the email field and returns a list of unique records.
Keep the first occurrence of each email.

Example:
records = [
    {"id": "1", "name": "Alice", "email": "alice@example.com"},
    {"id": "2", "name": "Bob", "email": "bob@example.com"},
    {"id": "3", "name": "Alice", "email": "alice@example.com"}
]

Output:
[
    {"id": "1", "name": "Alice", "email": "alice@example.com"},
    {"id": "2", "name": "Bob", "email": "bob@example.com"}
]'''
from typing import List, Dict

def deduplicate(records: List[Dict[str, str]]) -> List[Dict[str, str]]:
    unique_records = {} 
    for record in records:
        email = record["email"]
        if email not in unique_records:
            unique_records[email] = record
    return list(unique_records.values())
records = [
    {"id": "1", "name": "Alice", "email": "alice@example.com"},
    {"id": "2", "name": "Bob", "email": "bob@example.com"},
    {"id": "3", "name": "Alice", "email": "alice@example.com"}
]

output = deduplicate(records)
print(output)

[{'id': '1', 'name': 'Alice', 'email': 'alice@example.com'}, {'id': '2', 'name': 'Bob', 'email': 'bob@example.com'}]


In [3]:
#4) . Question: Sliding Window Aggregation
'''You are given a list of integers representing transaction amounts and a window size k.
Write a Python function sliding_window_sum(transactions: List[int], k: int) -> List[int] 
that returns a list containing the sum of every sliding window of size k.

Example:
transactions = [10, 20, 30, 40, 50]
k = 3
Output:
[60, 90, 120]
Explanation:
Sum of the first 3 transactions: 10 + 20 + 30 = 60
Sum of the next 3 transactions: 20 + 30 + 40 = 90
Sum of the last 3 transactions: 30 + 40 + 50 = 120 '''
from typing import List

def sliding_window_sum(transactions: List[int], k: int) -> List[int]:
    if k <= 0 or k > len(transactions):
        return []
    cumulative_sum = [0] * (len(transactions) + 1)
    for i in range(1, len(transactions) + 1):
        cumulative_sum[i] = cumulative_sum[i - 1] + transactions[i - 1]

    result = []
    for i in range(k, len(transactions) + 1):
        window_sum = cumulative_sum[i] - cumulative_sum[i - k]
        result.append(window_sum)

    return result
transactions = [10, 20, 30, 40, 50]
k = 3
output = sliding_window_sum(transactions, k)
print(output)  

[60, 90, 120]


In [6]:
#5) Question: Flatten Nested JSON
# You are given a nested JSON-like dictionary structure. Write a Python function flatten_json(nested_dict: Dict[str, Any]) -> Dict[str, Any] 
# that flattens the dictionary, where nested keys are represented as a concatenation of keys separated by a period (.).
from typing import Dict, Any

def flatten_dictionary(input_dict: Dict[str, Any], base_key: str = '', delimiter: str = '.') -> Dict[str, Any]:
    result_dict = {}
    for current_key, current_value in input_dict.items():
        new_key = base_key + delimiter + current_key if base_key else current_key
        if isinstance(current_value, dict):
            result_dict.update(flatten_dictionary(current_value, new_key, delimiter=delimiter))
        else:
            result_dict[new_key] = current_value
    
    return result_dict
nested_dict = {
    "user": {
        "id": 1,
        "details": {
            "name": "Alice",
            "address": {
                "city": "New York",
                "zipcode": 10001
            }
        }
    }
}

flattened_output = flatten_dictionary(nested_dict)
print(flattened_output)


{'user.id': 1, 'user.details.name': 'Alice', 'user.details.address.city': 'New York', 'user.details.address.zipcode': 10001}


In [7]:
#6) You are given a list of words, and you need to find the top N most frequent words. Write a Python function top_n_frequent_words(words: List[str], n: int) -> List[str] that returns a list of the top N frequent words.
# The result should be sorted by frequency, and if two words have the same frequency, they should be sorted lexicographically.
from collections import Counter
from typing import List

def top_n_frequent_words(words: List[str], n: int) -> List[str]:
    word_count = Counter(words)
    sorted_words = sorted(word_count.items(), key=lambda x: (-x[1], x[0]))
    return [word for word, count in sorted_words[:n]]
words = ["apple", "banana", "apple", "orange", "banana", "apple"]
n = 2
print(top_n_frequent_words(words, n))

['apple', 'banana']


In [9]:
#7) 
'''You are given two lists of dictionaries, employees and departments, which represent two tables in a relational database.
Each employee has a department_id that links them to a department. 
Write a Python function merge_data(employees: List[Dict[str, Union[str, int]]], departments: List[Dict[str, str]]) -> List[Dict[str, Union[str, int]]] 
that merges the two datasets into one, adding the department_name to each employee's data.'''

from typing import List, Dict, Union

def merge_data(employees: List[Dict[str, Union[str, int]]], departments: List[Dict[str, str]]) -> List[Dict[str, Union[str, int]]]:
    department_map = {dept['id']: dept['department_name'] for dept in departments}
    for emp in employees:
        emp['department_name'] = department_map.get(emp['department_id'], "Unknown")
    return employees
    
employees = [
    {"id": 1, "name": "Alice", "department_id": 2},
    {"id": 2, "name": "Bob", "department_id": 1}
]
departments = [
    {"id": 1, "department_name": "Engineering"},
    {"id": 2, "department_name": "Marketing"}
]

merged_data = merge_data(employees, departments)
print(merged_data)

[{'id': 1, 'name': 'Alice', 'department_id': 2, 'department_name': 'Marketing'}, {'id': 2, 'name': 'Bob', 'department_id': 1, 'department_name': 'Engineering'}]


In [10]:
'''8) Write a class MovingAverage that computes the moving average of the last k values from a data stream. 
 The class should implement two methods:
 __init__(self, k: int) initializes the moving average with the window size k.
 next(self, val: int) -> float returns the moving average of the last k values'''

class MovingAverage:
    def __init__(self, k: int):
        self.k = k
        self.window = []  
    def next(self, val: int) -> float:
        self.window.append(val)
        if len(self.window) > self.k:
            self.window = self.window[-self.k:]  
        return sum(self.window) / len(self.window)

ma = MovingAverage(3)
print(ma.next(10))  
print(ma.next(20))  
print(ma.next(30))  
print(ma.next(40))  

10.0
15.0
20.0
30.0


In [11]:
#9  JSON Field Renaming
#You are given a list of dictionaries representing JSON objects, where each dictionary has fields name, age, and location. 
# Your task is to write a Python function rename_fields(data: List[Dict[str, Union[str, int]]]) -> List[Dict[str, Union[str, int]]] 
#that renames the field location to city for each dictionary in the list.

from typing import List, Dict, Union
def rename_fields(data: List[Dict[str, Union[str, int]]]) -> List[Dict[str, Union[str, int]]]:
    for item in data:
        if 'location' in item:
            item['city'] = item.pop('location')
    return data
data = [
    {"name": "Alice", "age": 25, "location": "New York"},
    {"name": "Bob", "age": 30, "location": "San Francisco"}
]
print(rename_fields(data))

[{'name': 'Alice', 'age': 25, 'city': 'New York'}, {'name': 'Bob', 'age': 30, 'city': 'San Francisco'}]


In [12]:
#10. Question: Reformat Date Strings
# You are given a list of date strings in the format "DD-MM-YYYY". Write a Python function reformat_dates(dates: List[str]) -> List[str] 
# that reformats each date into the format "YYYY-MM-DD".
from typing import List

def reformat_dates(dates: List[str]) -> List[str]:
    return ['-'.join(date.split('-')[::-1]) for date in dates]

dates = ["31-12-2024", "01-01-2024"]
print(reformat_dates(dates))

['2024-12-31', '2024-01-01']
