security table

In [11]:
import pandas as pd
import random
import string
from itertools import permutations
from datetime import datetime, timedelta, date


instrument_types = [
    'equity', 'fixed income', 'derivatives', 'commodity', 'currency',
    'cash equivalent', 'real estate', 'mutual fund', 'etf',
    'private equity', 'hedge fund', 'structured product'
]
custodian_names = ['State Street', 'BNY Mellon', 'Northern Trust', 'HDFC Custodian']

used_sec_ids = set()
security_data = []

def generate_unique_sec_id():
    while True:
        num = random.randint(1000, 9999)
        sec_id = f"SEC{num}"
        if sec_id not in used_sec_ids:
            used_sec_ids.add(sec_id)
            return sec_id

def generate_cusip():
    return ''.join(random.choices(string.digits, k=9))

def generate_isin(cusip):
    country_code = random.choice(['US', 'IN', 'GB', 'DE', 'JP'])
    check_letter = random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ')  # single letter
    return country_code + cusip + check_letter


for _ in range(1000):
    security_id = generate_unique_sec_id()
    cusip = generate_cusip()
    isin = generate_isin(cusip)
    instrument = random.choice(instrument_types)
    custodian_name = random.choice(custodian_names)
    security_data.append([security_id, cusip, isin, instrument, custodian_name])

security_df = pd.DataFrame(security_data, columns=['security_id', 'cusip', 'isin', 'instrument', 'custodian_name'])



In [12]:
security_df.head(10)

Unnamed: 0,security_id,cusip,isin,instrument,custodian_name
0,SEC4102,219591803,US219591803A,derivatives,HDFC Custodian
1,SEC3933,333244684,DE333244684D,equity,State Street
2,SEC2976,46265237,US046265237Z,fixed income,BNY Mellon
3,SEC6786,332238329,US332238329H,real estate,Northern Trust
4,SEC6339,802237548,GB802237548U,currency,BNY Mellon
5,SEC9678,797438679,US797438679Q,real estate,HDFC Custodian
6,SEC2894,803407225,JP803407225J,equity,BNY Mellon
7,SEC3444,159369198,JP159369198Z,cash equivalent,Northern Trust
8,SEC9307,780189494,DE780189494S,private equity,State Street
9,SEC7155,154229758,DE154229758C,fixed income,State Street


In [13]:
security_df.to_csv("security_table.csv", index=False)

In [14]:
security_df = pd.read_csv("security_table.csv")
duplicates = security_df['security_id'].duplicated().sum()
print(f"🔁 Duplicate security_id entries: {duplicates}")


🔁 Duplicate security_id entries: 0


Settlement table


In [16]:
company_df = pd.read_csv("company_details.csv")  # Should have 'company_id'
company_ids = company_df['company_id'].tolist()

In [19]:
company_df = pd.read_csv("company_details.csv")  # must have 'company_id'
security_df = pd.read_csv("security_table.csv")  # must have 'security_id'

company_ids = company_df['company_id'].tolist()
security_ids = security_df['security_id'].tolist()

# Generate all unique sender ≠ receiver pairs
all_possible_pairs = list(permutations(company_ids, 2))
random.shuffle(all_possible_pairs)

print(f"Total unique sender ≠ receiver pairs available: {len(all_possible_pairs)}")

# Prepare for settlement ID uniqueness
used_sett_ids = set()
settlement_data = []

def generate_unique_sett_id():
    while True:
        sid = f"SETT{random.randint(1000, 9999)}"
        if sid not in used_sett_ids:
            used_sett_ids.add(sid)
            return sid

# Adjust max rows safely
max_settlements = min(5000, len(all_possible_pairs))

for i in range(max_settlements):
    sender_id, receiver_id = all_possible_pairs[i]
    settlement_id = generate_unique_sett_id()
    security_id = random.choice(security_ids)
    settlement_date = (date.today() + timedelta(days=random.randint(1, 60))).isoformat()
    settlement_data.append([settlement_id, sender_id, receiver_id, security_id, settlement_date])

# Create DataFrame and save
settlement_df = pd.DataFrame(settlement_data, columns=[
    'settlement_id', 'sender_id', 'receiver_id', 'security_id', 'settlement_date'
])



Total unique sender ≠ receiver pairs available: 20


In [20]:
settlement_df.to_csv("settlement_instructions_extended.csv", index=False)


In [21]:
settlement_df = pd.read_csv("settlement_instructions_extended.csv")
duplicates = settlement_df['settlement_id'].duplicated().sum()
print(f"🔁 Duplicate settlement_id entries: {duplicates}")


🔁 Duplicate settlement_id entries: 0
