Data generations

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

# Generate Customers (100 records)
customers = pd.DataFrame({
    'customer_id': range(1, 101),
    'name': [f'Customer_{i}' for i in range(1, 101)],
    'region_id': np.random.choice([1,2,3,4,5], 100)
})

# Generate Funds (20 sample funds)
funds = pd.DataFrame({
    'fund_id': range(1, 21),
    'fund_name': [f'Fund_{i}' for i in range(1, 21)],
    'region_id': np.random.choice([1,2,3,4,5], 20),
    'risk_category': np.random.choice(['Low','Medium','High'], 20)
})

# Generate Fund Performance (6 months daily data)
dates = pd.date_range(start='2024-12-01', end='2025-05-31')
fund_performance = []
for fund_id in funds['fund_id']:
    nav = 1000.0
    for date in dates:
        daily_return = np.random.normal(0.0005, 0.015)
        nav *= (1 + daily_return)
        fund_performance.append({
            'fund_id': fund_id,
            'as_of_date': date.strftime('%Y-%m-%d'),
            'daily_return': round(daily_return, 6),
            'net_asset_value': round(nav, 2)
        })

# Generate Transactions (10,000 records)
transactions = pd.DataFrame({
    'transaction_id': range(1, 10001),
    'customer_id': np.random.choice(customers['customer_id'], 10000),
    'fund_id': np.random.choice(funds['fund_id'], 10000),
    'amount': np.round(np.random.lognormal(mean=7, sigma=1.5, size=10000), 2),
    'transaction_type': np.random.choice(['BUY','SELL','DIVIDEND'], 10000, p=[0.45,0.45,0.1]),
    'executed_at': pd.to_datetime(np.random.randint(
        pd.Timestamp('2024-12-01').value // 10**9,
        pd.Timestamp('2025-05-31').value // 10**9,
        size=10000
    ), unit='s')
})

# Generate Regions
regions = pd.DataFrame({
    'region_id': [1,2,3,4,5],
    'region_name': ['North America', 'Europe', 'Asia', 'South America', 'Africa'],
    'currency_code': ['USD', 'EUR', 'JPY', 'BRL', 'ZAR']
})

# Save to CSV files
customers.to_csv('customers.csv', index=False)
funds.to_csv('funds.csv', index=False)
pd.DataFrame(fund_performance).to_csv('fund_performance.csv', index=False)
transactions.to_csv('transactions.csv', index=False)
regions.to_csv('regions.csv', index=False)


 #### 1. Write a Python function to find the second largest element in a list without using built-in sorting functions.

In [36]:
import pandas as pd
py_list=[12,22,35,45,8,11]

 #### 2. Implement a program that takes a dictionary of funds (keys: fund names, values: returns) and returns the name of the fund with the highest return.

In [35]:
funds = {
    "Fund A": 205,
    "Fund B": 207,
    "Fund C": 190,
    "Fund D": 250
}
fund_with_highest_return=max(funds)
print(fund_with_highest_return)

Fund D


 #### 3. Write a Python script to remove duplicates from a list of client IDs while maintaining the original order.

In [10]:
client_ids=[101,101,102,102,103,104,105]
client_ids=pd.Series(client_ids)
client_ids

0    101
1    101
2    102
3    102
4    103
5    104
6    105
dtype: int64

In [16]:
client_ids.drop_duplicates()

0    101
2    102
4    103
5    104
6    105
dtype: int64


 #### 4. Develop a function to merge two dictionaries containing client portfolios, summing the values of common keys (assets).

 #### 5. Explain the differences between Python’s collections.defaultdict and a standard dictionary. When would you use defaultdict in financial data processing?

 #### 6. Discuss how Python’s multiprocessing module can be used to improve the performance of a script analyzing high-frequency trading data.

 #### 7. How would you use Python’s itertools module to generate all possible portfolio combinations from a list of 10 assets? Discuss the computational challenges of such tasks.

 #### 8. Explain the use of Python’s decorators to log execution time and memory usage for functions processing large-scale financial data.