In [1]:
# Simulated data structure based on the user's description
# For the purpose of demonstration, we'll create a simple representation of the assumed CSV structure
csv_data = """
domain,sourceSystem,table,connectsTo
Finance,AccountingDB,Invoices,Salaries;Campaigns
Finance,AccountingDB,Payments,DealsClosed
Finance,AccountingDB,Budgets,Assets
Finance,PayrollDB,Salaries,Employees
Finance,PayrollDB,Bonuses,
Finance,PayrollDB,Deductions,
HR,RecruitmentDB,Candidates,Interviews
HR,RecruitmentDB,Interviews,Offers;Feedbacks
HR,RecruitmentDB,Offers,
HR,EmployeeDB,Employees,Departments
HR,EmployeeDB,Departments,
HR,EmployeeDB,Positions,
IT,AssetManagementDB,Assets,Licenses
IT,AssetManagementDB,Licenses,
IT,AssetManagementDB,Vendors,
IT,SupportDB,Tickets,ServiceLevels
IT,SupportDB,ServiceLevels,Customers
IT,SupportDB,Customers,
Marketing,CampaignsDB,Campaigns,Leads
Marketing,CampaignsDB,Leads,Conversions
Marketing,CampaignsDB,Conversions,
Marketing,SocialMediaDB,Posts,Interactions
Marketing,SocialMediaDB,Interactions,Followers
Marketing,SocialMediaDB,Followers,
Sales,SalesDB,Opportunities,DealsClosed
Sales,SalesDB,DealsClosed,Contacts
Sales,SalesDB,Contacts,
Sales,CustomerFeedbackDB,Feedbacks,Ratings
Sales,CustomerFeedbackDB,Ratings,Improvements
Sales,CustomerFeedbackDB,Improvements,
"""

# Convert the simulated CSV data into a DataFrame for manipulation
import pandas as pd
from io import StringIO

df = pd.read_csv(StringIO(csv_data))

# Convert the DataFrame into the specified JSON-like format
result = []
for domain in df['domain'].unique():
    domain_dict = {"domain": domain, "sourceSystems": []}
    domain_df = df[df['domain'] == domain]
    for source_system in domain_df['sourceSystem'].unique():
        ss_df = domain_df[domain_df['sourceSystem'] == source_system]
        tables_list = []
        for _, row in ss_df.iterrows():
            table_dict = {
                "name": row['table'],
                "connectsTo": row['connectsTo'].split(';') if pd.notna(row['connectsTo']) else []
            }
            tables_list.append(table_dict)
        ss_dict = {
            "name": source_system,
            "tables": tables_list
        }
        domain_dict["sourceSystems"].append(ss_dict)
    result.append(domain_dict)

result


[{'domain': 'Finance',
  'sourceSystems': [{'name': 'AccountingDB',
    'tables': [{'name': 'Invoices', 'connectsTo': ['Salaries', 'Campaigns']},
     {'name': 'Payments', 'connectsTo': ['DealsClosed']},
     {'name': 'Budgets', 'connectsTo': ['Assets']}]},
   {'name': 'PayrollDB',
    'tables': [{'name': 'Salaries', 'connectsTo': ['Employees']},
     {'name': 'Bonuses', 'connectsTo': []},
     {'name': 'Deductions', 'connectsTo': []}]}]},
 {'domain': 'HR',
  'sourceSystems': [{'name': 'RecruitmentDB',
    'tables': [{'name': 'Candidates', 'connectsTo': ['Interviews']},
     {'name': 'Interviews', 'connectsTo': ['Offers', 'Feedbacks']},
     {'name': 'Offers', 'connectsTo': []}]},
   {'name': 'EmployeeDB',
    'tables': [{'name': 'Employees', 'connectsTo': ['Departments']},
     {'name': 'Departments', 'connectsTo': []},
     {'name': 'Positions', 'connectsTo': []}]}]},
 {'domain': 'IT',
  'sourceSystems': [{'name': 'AssetManagementDB',
    'tables': [{'name': 'Assets', 'connectsTo': [