In [16]:
"""1.
You are given a list of rows from an SQL database represented as a list of dictionaries, 
where each dictionary corresponds to a row. Each row contains user_id, name, age, and city.

Write a Python function transform_data(rows: List[Dict[str, Union[str, int]]]) -> Dict[int, Dict[str, Union[str, int]]] 
that transforms the data into a dictionary format where the keys are the user_id values, 
and the corresponding value is a dictionary of the remaining fields (name, age, city).

Example:
rows = [
    {"user_id": 1, "name": "Alice", "age": 25, "city": "New York"},
    {"user_id": 2, "name": "Bob", "age": 30, "city": "San Francisco"}
]
Output:
{
    1: {"name": "Alice", "age": 25, "city": "New York"},
    2: {"name": "Bob", "age": 30, "city": "San Francisco"}
}
"""
from typing import List, Dict, Union

def transform_data(rows: List[Dict[str, Union[str, int]]]) -> Dict[int, Dict[str, Union[str, int]]]:
    transformed = {}
    
    for row in rows:
        user_id = row['user_id']
        # Create a new dictionary excluding user_id
        user_info = {key: row[key] for key in row if key != 'user_id'}
        transformed[user_id] = user_info
        
    return transformed

rows = [
    {"user_id": 1, "name": "Alice", "age": 25, "city": "New York"},
    {"user_id": 2, "name": "Bob", "age": 30, "city": "San Francisco"}
]

output = transform_data(rows)
print(output)


{1: {'name': 'Alice', 'age': 25, 'city': 'New York'}, 2: {'name': 'Bob', 'age': 30, 'city': 'San Francisco'}}


In [17]:
"""2.
You are given a list of log file entries, where each entry is a string in the format "[timestamp] user_id action".
The action can be either "start" or "stop".
Your task is to write a Python function process_logs(logs: List[str]) -> Dict[int, int] 
that returns a dictionary mapping each user_id to the total duration (in seconds) they were active 
(i.e., between start and stop).

Assume:

All start actions are followed by a stop for the same user.
The timestamp is in the format "YYYY-MM-DD HH:MM:SS".
Example:
logs = [
    "[2024-01-01 10:00:00] 1 start",
    "[2024-01-01 10:05:00] 1 stop",
    "[2024-01-01 10:00:00] 2 start",
    "[2024-01-01 10:10:00] 2 stop"
]
Output:
{
    1: 300,  # 5 minutes
    2: 600   # 10 minutes
}

"""
from typing import List, Dict
from datetime import datetime

def process_logs(logs: List[str]) -> Dict[int, int]:
    active_durations = {}
    start_times = {}

    for log in logs:
       
        timestamp_str = log.split(']')[0]  
        user_id_str, action = log.split(']')[1].strip().split(' ', 1)  

        user_id = int(user_id_str.strip())  

        
        timestamp = datetime.strptime(timestamp_str[1:], "%Y-%m-%d %H:%M:%S") 

        if action == 'start':
            
            start_times[user_id] = timestamp
        elif action == 'stop':
            
            if user_id in start_times:
                duration = (timestamp - start_times[user_id]).total_seconds()
                if user_id in active_durations:
                    active_durations[user_id] += duration
                else:
                    active_durations[user_id] = duration
                
                del start_times[user_id]

    return active_durations

logs = [
    "[2024-01-01 10:00:00] 1 start",
    "[2024-01-01 10:05:00] 1 stop",
    "[2024-01-01 10:00:00] 2 start",
    "[2024-01-01 10:10:00] 2 stop"
]

output = process_logs(logs)
print(output)


{1: 300.0, 2: 600.0}


In [18]:
"""3.
You are given a list of dictionaries representing records, where each dictionary contains id, name, and email.
Write a Python function deduplicate(records: List[Dict[str, str]]) -> List[Dict[str, str]] 
that removes duplicate records based on the email field and returns a list of unique records. 
Keep the first occurrence of each email.

Example:
records = [
    {"id": "1", "name": "Alice", "email": "alice@example.com"},
    {"id": "2", "name": "Bob", "email": "bob@example.com"},
    {"id": "3", "name": "Alice", "email": "alice@example.com"}
]
Output:
[
    {"id": "1", "name": "Alice", "email": "alice@example.com"},
    {"id": "2", "name": "Bob", "email": "bob@example.com"}
]
"""
from typing import List, Dict

def deduplicate(records: List[Dict[str, str]]) -> List[Dict[str, str]]:
    seen_emails = set()  
    unique_records = []   

    for record in records:
        email = record['email']
        if email not in seen_emails:
            seen_emails.add(email)  
            unique_records.append(record) 

    return unique_records

records = [
    {"id": "1", "name": "Alice", "email": "alice@example.com"},
    {"id": "2", "name": "Bob", "email": "bob@example.com"},
    {"id": "3", "name": "Alice", "email": "alice@example.com"}
]

output = deduplicate(records)
print(output)


[{'id': '1', 'name': 'Alice', 'email': 'alice@example.com'}, {'id': '2', 'name': 'Bob', 'email': 'bob@example.com'}]


In [19]:
"""
4. Question: Sliding Window Aggregation
You are given a list of integers representing transaction amounts and a window size k.
Write a Python function sliding_window_sum(transactions: List[int], k: int) -> List[int]
that returns a list containing the sum of every sliding window of size k.

Example:
transactions = [10, 20, 30, 40, 50]
k = 3
Output:
[60, 90, 120]
Explanation:
Sum of the first 3 transactions: 10 + 20 + 30 = 60
Sum of the next 3 transactions: 20 + 30 + 40 = 90
Sum of the last 3 transactions: 30 + 40 + 50 = 120
"""
from typing import List

def sliding_window_sum(transactions: List[int], k: int) -> List[int]:
    if not transactions or k <= 0 or k > len(transactions):
        return []

    window_sums = []
    current_sum = sum(transactions[:k])  

    window_sums.append(current_sum)

    for i in range(1, len(transactions) - k + 1):
        
        current_sum += transactions[i + k - 1] - transactions[i - 1]
        window_sums.append(current_sum)

    return window_sums


transactions = [10, 20, 30, 40, 50]
k = 3
output = sliding_window_sum(transactions, k)
print(output)


[60, 90, 120]


In [20]:
"""
5. Question: Flatten Nested JSON
You are given a nested JSON-like dictionary structure. 
Write a Python function flatten_json(nested_dict: Dict[str, Any]) -> Dict[str, Any] that flattens the dictionary, 
where nested keys are represented as a concatenation of keys separated by a period (.).

Example:
nested_dict = {
    "user": {
        "id": 1,
        "details": {
            "name": "Alice",
            "address": {
                "city": "New York",
                "zipcode": 10001
            }
        }
    }
}
Output:
{
    "user.id": 1,
    "user.details.name": "Alice",
    "user.details.address.city": "New York",
    "user.details.address.zipcode": 10001
}

"""
from typing import Dict, Any

def flatten_json(nested_dict: Dict[str, Any], parent_key: str = '', sep: str = '.') -> Dict[str, Any]:
    items = {}

    for key, value in nested_dict.items():
       
        new_key = f"{parent_key}{sep}{key}" if parent_key else key
        
        if isinstance(value, dict): 
            items.update(flatten_json(value, new_key, sep=sep))
        else:  
            items[new_key] = value

    return items

nested_dict = {
    "user": {
        "id": 1,
        "details": {
            "name": "Alice",
            "address": {
                "city": "New York",
                "zipcode": 10001
            }
        }
    }
}

output = flatten_json(nested_dict)
print(output)


{'user.id': 1, 'user.details.name': 'Alice', 'user.details.address.city': 'New York', 'user.details.address.zipcode': 10001}


In [21]:
"""
6. Question: Find Top N Frequent Words
You are given a list of words, and you need to find the top N most frequent words.
Write a Python function top_n_frequent_words(words: List[str], n: int) -> List[str] 
that returns a list of the top N frequent words. The result should be sorted by frequency, 
and if two words have the same frequency, they should be sorted lexicographically.

Example:
words = ["apple", "banana", "apple", "orange", "banana", "apple"]
n = 2
Output:
["apple", "banana"]
"""
from typing import List
from collections import Counter

def top_n_frequent_words(words: List[str], n: int) -> List[str]:
    frequency = Counter(words)
    sorted_words = sorted(frequency.keys(), key=lambda x: (-frequency[x], x))
    return sorted_words[:n]

words = ["apple", "banana", "apple", "orange", "banana", "apple"]
n = 2
output = top_n_frequent_words(words, n)
print(output)


['apple', 'banana']


In [22]:
"""
7. Question: Merge DataFrames
You are given two lists of dictionaries, employees and departments, which represent two tables in a relational database.
Each employee has a department_id that links them to a department. 
Write a Python function merge_data(employees: List[Dict[str, Union[str, int]]], 
departments: List[Dict[str, str]]) -> List[Dict[str, Union[str, int]]] that merges the two datasets into one,
adding the department_name to each employee's data.

Example:
employees = [
    {"id": 1, "name": "Alice", "department_id": 2},
    {"id": 2, "name": "Bob", "department_id": 1}
]
departments = [
    {"id": 1, "department_name": "Engineering"},
    {"id": 2, "department_name": "Marketing"}
]
Output:
[
    {"id": 1, "name": "Alice", "department_id": 2, "department_name": "Marketing"},
    {"id": 2, "name": "Bob", "department_id": 1, "department_name": "Engineering"}
]
"""
from typing import List, Dict, Union

def merge_data(employees: List[Dict[str, Union[str, int]]], departments: List[Dict[str, str]]) -> List[Dict[str, Union[str, int]]]:
    # Create a mapping of department_id to department_name
    department_map = {dept['id']: dept['department_name'] for dept in departments}
    
    # Merge the employees with their department names
    merged_data = []
    for emp in employees:
        department_name = department_map.get(emp['department_id'], None)  # Get department name or None if not found
        emp['department_name'] = department_name  # Add department name to employee record
        merged_data.append(emp)  # Append the enriched employee record to the list

    return merged_data

employees = [
    {"id": 1, "name": "Alice", "department_id": 2},
    {"id": 2, "name": "Bob", "department_id": 1}
]
departments = [
    {"id": 1, "department_name": "Engineering"},
    {"id": 2, "department_name": "Marketing"}
]

output = merge_data(employees, departments)
print(output)


[{'id': 1, 'name': 'Alice', 'department_id': 2, 'department_name': 'Marketing'}, {'id': 2, 'name': 'Bob', 'department_id': 1, 'department_name': 'Engineering'}]


In [23]:
"""
8. Question: Moving Average of a Data Stream
Write a class MovingAverage that computes the moving average of the last k values from a data stream.
The class should implement two methods:

__init__(self, k: int) initializes the moving average with the window size k.
next(self, val: int) -> float returns the moving average of the last k values.
Example:
ma = MovingAverage(3)
print(ma.next(10))  # returns 10.0
print(ma.next(20))  # returns 15.0
print(ma.next(30))  # returns 20.0
print(ma.next(40))  # returns 30.0
Explanation:
After the first value (10), the average is 10.
After the second value (20), the average of [10, 20] is 15.
After the third value (30), the average of [10, 20, 30] is 20.
After the fourth value (40), the window shifts to [20, 30, 40], and the average is 30.
"""
from collections import deque

class MovingAverage:
    def __init__(self, k: int):
        self.k = k                  
        self.window = deque()       
        self.current_sum = 0.0      

    def next(self, val: int) -> float:
       
        self.window.append(val)
        self.current_sum += val
        
        
        if len(self.window) > self.k:
            oldest_value = self.window.popleft()
            self.current_sum -= oldest_value  
       
        return self.current_sum / len(self.window)

ma = MovingAverage(3)
print(ma.next(10))  
print(ma.next(20))  
print(ma.next(30))  
print(ma.next(40))  

10.0
15.0
20.0
30.0


In [24]:
"""
9. Question: JSON Field Renaming
You are given a list of dictionaries representing JSON objects, where each dictionary has fields name, age, and location.
Your task is to write a Python function rename_fields(data: List[Dict[str, Union[str, int]]]) -> List[Dict[str, Union[str, int]]] 
that renames the field location to city for each dictionary in the list.

Example:
data = [
    {"name": "Alice", "age": 25, "location": "New York"},
    {"name": "Bob", "age": 30, "location": "San Francisco"}
]
Output:
[
    {"name": "Alice", "age": 25, "city": "New York"},
    {"name": "Bob", "age": 30, "city": "San Francisco"}
]
"""
from typing import List, Dict, Union

def rename_fields(data: List[Dict[str, Union[str, int]]]) -> List[Dict[str, Union[str, int]]]:
    for record in data:
        if 'location' in record:
            record['city'] = record.pop('location')  
    return data


data = [
    {"name": "Alice", "age": 25, "location": "New York"},
    {"name": "Bob", "age": 30, "location": "San Francisco"}
]
output = rename_fields(data)
print(output)


[{'name': 'Alice', 'age': 25, 'city': 'New York'}, {'name': 'Bob', 'age': 30, 'city': 'San Francisco'}]


In [25]:
"""
10. Question: Reformat Date Strings
You are given a list of date strings in the format "DD-MM-YYYY". Write a Python function reformat_dates(dates: List[str]) -> List[str] that reformats each date into the format "YYYY-MM-DD".

Example:
dates = ["31-12-2024", "01-01-2024"]
Output:
["2024-12-31", "2024-01-01"]
"""
from typing import List

def reformat_dates(dates: List[str]) -> List[str]:
    reformatted_dates = []
    
    for date in dates:
        day, month, year = date.split('-')  
        reformatted_date = f"{year}-{month}-{day}"  
        reformatted_dates.append(reformatted_date)
    
    return reformatted_dates

dates = ["31-12-2024", "01-01-2024"]
output = reformat_dates(dates)
print(output)


['2024-12-31', '2024-01-01']
