In [1]:
import pandas as pd
import numpy as np
import sqlite3
from fuzzywuzzy import process

### Download the data from NIC
- www.ffiec.govnpw/FinancialReport/ReturnRelationshipsZipFile
- www.ffiec.govnpw/FinancialReport/ReturnAttributesActiveZipFileCSV
- www.ffiec.govnpw/FinancialReport/ReturnAttributesClosedZipFileCSV

In [2]:
rel_data = pd.read_csv('CSV_RELATIONSHIPS.CSV') # maps the holding companies and their offsprings

att_data_active = pd.read_csv('CSV_ATTRIBUTES_ACTIVE.CSV') # includes names and attributes of companies (active)
att_data_closed = pd.read_csv('CSV_ATTRIBUTES_CLOSED.CSV') # includes names and attributes of companies (closed)

  att_data_active = pd.read_csv('CSV_ATTRIBUTES_ACTIVE.CSV') # includes names and attributes of companies (active)
  att_data_closed = pd.read_csv('CSV_ATTRIBUTES_CLOSED.CSV') # includes names and attributes of companies (closed)


### Combine all the banks and institutions (active and closed)
    for which the Fed has a supervisory, regulatory, or research interest

In [3]:
att_data = pd.concat([att_data_active, att_data_closed])
att_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 218085 entries, 0 to 156438
Data columns (total 74 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   #ID_RSSD            218085 non-null  int64  
 1   D_DT_START          218085 non-null  object 
 2   D_DT_END            218085 non-null  object 
 3   BHC_IND             218085 non-null  int64  
 4   BROAD_REG_CD        218085 non-null  int64  
 5   CHTR_AUTH_CD        218085 non-null  int64  
 6   CHTR_TYPE_CD        218085 non-null  int64  
 7   FBO_4C9_IND         218085 non-null  int64  
 8   FHC_IND             218085 non-null  int64  
 9   FUNC_REG            218085 non-null  int64  
 10  INSUR_PRI_CD        218085 non-null  int64  
 11  MBR_FHLBS_IND       218085 non-null  int64  
 12  MBR_FRS_IND         218085 non-null  int64  
 13  SEC_RPTG_STATUS     218085 non-null  int64  
 14  EST_TYPE_CD         218085 non-null  int64  
 15  BANK_CNT            14793 non-null   fl

In [4]:
selected_columns_descriptions = {
    '#ID_RSSD': 'RSSD ID',
    'BHC_IND': 'Bank Holding Company Indicator',
    'CHTR_TYPE_CD': 'Entity Type Code',
    'FHC_IND': 'Financial Holding Company Indicator',
    'D_DT_EXIST_CMNC': 'Date of Commencement of Existence',
    'D_DT_EXIST_TERM': 'Date of Termination of Existence',
    'NM_LGL': 'Legal Name',
    'NM_SHORT': 'Short Name',
    'DOMESTIC_IND': 'Domestic Indicator',
    'CNTRY_NM': 'Country Name',
    'ID_CUSIP': 'CUSIP ID',
}

In [5]:
selected_columns = list(selected_columns_descriptions.keys())

att_selected = att_data[selected_columns]
att_selected.info()

<class 'pandas.core.frame.DataFrame'>
Index: 218085 entries, 0 to 156438
Data columns (total 11 columns):
 #   Column           Non-Null Count   Dtype 
---  ------           --------------   ----- 
 0   #ID_RSSD         218085 non-null  int64 
 1   BHC_IND          218085 non-null  int64 
 2   CHTR_TYPE_CD     218085 non-null  int64 
 3   FHC_IND          218085 non-null  int64 
 4   D_DT_EXIST_CMNC  40501 non-null   object
 5   D_DT_EXIST_TERM  218085 non-null  object
 6   NM_LGL           218085 non-null  object
 7   NM_SHORT         218085 non-null  object
 8   DOMESTIC_IND     218085 non-null  object
 9   CNTRY_NM         218085 non-null  object
 10  ID_CUSIP         218085 non-null  object
dtypes: int64(4), object(7)
memory usage: 20.0+ MB


In [6]:
selected_columns_descriptions = {
    '#ID_RSSD_PARENT': 'RSSD ID of Parent',
    'ID_RSSD_OFFSPRING': 'RSSD ID of Offspring',
    'RELN_LVL': 'Relationship Level',
    'PCT_EQUITY_BRACKET': 'Percent Equity Bracket',
    'D_DT_RELN_EST': 'Date Relationship Was Established',
}

In [7]:
selected_columns = list(selected_columns_descriptions.keys())

rel_selected = rel_data[selected_columns]
rel_selected.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 277752 entries, 0 to 277751
Data columns (total 5 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   #ID_RSSD_PARENT     277752 non-null  int64 
 1   ID_RSSD_OFFSPRING   277752 non-null  int64 
 2   RELN_LVL            277752 non-null  int64 
 3   PCT_EQUITY_BRACKET  277752 non-null  object
 4   D_DT_RELN_EST       277752 non-null  object
dtypes: int64(3), object(2)
memory usage: 10.6+ MB


### merge

In [8]:
rel_with_parent = pd.merge(rel_selected, att_selected, left_on='#ID_RSSD_PARENT', right_on='#ID_RSSD', how='left', suffixes=('', '_PARENT'))
final_df = pd.merge(rel_with_parent, att_selected, left_on='ID_RSSD_OFFSPRING', right_on='#ID_RSSD', how='left', suffixes=('', '_OFFSPRING'))

In [9]:
cols = ['#ID_RSSD_PARENT','NM_LGL','NM_SHORT','ID_RSSD_OFFSPRING','NM_LGL_OFFSPRING','NM_SHORT_OFFSPRING']
rest_cols = [col for col in final_df.columns if col not in cols]
final_df = final_df[cols + rest_cols]
final_df

Unnamed: 0,#ID_RSSD_PARENT,NM_LGL,NM_SHORT,ID_RSSD_OFFSPRING,NM_LGL_OFFSPRING,NM_SHORT_OFFSPRING,RELN_LVL,PCT_EQUITY_BRACKET,D_DT_RELN_EST,#ID_RSSD,...,ID_CUSIP,#ID_RSSD_OFFSPRING,BHC_IND_OFFSPRING,CHTR_TYPE_CD_OFFSPRING,FHC_IND_OFFSPRING,D_DT_EXIST_CMNC_OFFSPRING,D_DT_EXIST_TERM_OFFSPRING,DOMESTIC_IND_OFFSPRING,CNTRY_NM_OFFSPRING,ID_CUSIP_OFFSPRING
0,130,PARK BANK OF FLORIDA ...,PARK BK OF FL,1081305,"PARK REAL PROPERTY, INC. ...",PARK REAL PROP,1,80-100,06/27/1984 00:00:00,130.0,...,0,1081305.0,0.0,720.0,0.0,,02/14/1986 00:00:00,Y,UNITED STATES,0
1,279,"BROADSTREET BANK, SSB ...",BROADSTREET BK SSB,3923155,MINEOLA FINANCIAL SERVICE CORPORATION ...,MINEOLA FNCL SVC CORP,1,100,01/23/2008 00:00:00,279.0,...,0,3923155.0,0.0,720.0,0.0,,12/31/9999 00:00:00,Y,UNITED STATES,0
2,505,BALLSTON SPA NATIONAL BANK ...,BALLSTON SPA NB,2913168,"BSNB REAL ESTATE COMPANY, INC. ...",BSNB RE CO,1,80-100,05/27/1999 00:00:00,505.0,...,0,2913168.0,0.0,720.0,0.0,05/27/1999 00:00:00,12/31/9999 00:00:00,Y,UNITED STATES,0
3,505,BALLSTON SPA NATIONAL BANK ...,BALLSTON SPA NB,2913168,"BSNB REAL ESTATE COMPANY, INC. ...",BSNB RE CO,1,100,05/27/1999 00:00:00,505.0,...,0,2913168.0,0.0,720.0,0.0,05/27/1999 00:00:00,12/31/9999 00:00:00,Y,UNITED STATES,0
4,505,BALLSTON SPA NATIONAL BANK ...,BALLSTON SPA NB,3079566,BSNB I. S. INC. ...,BSNB I S,1,80-100,06/01/2000 00:00:00,505.0,...,0,3079566.0,0.0,550.0,0.0,06/01/2000 00:00:00,12/31/9999 00:00:00,Y,UNITED STATES,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277772,5924316,US GCDC HOLDINGS 2 LLC ...,US GCDC HOLDS 2 LLC,5924343,US GCDC PHASE 2 HOLDINGS GP LLC ...,US GCDC PHASE 2 HOLDS GP LLC,1,<25,12/31/2023 00:00:00,5924316.0,...,0,5924343.0,0.0,720.0,0.0,,12/31/9999 00:00:00,Y,UNITED STATES,0
277773,5924316,US GCDC HOLDINGS 2 LLC ...,US GCDC HOLDS 2 LLC,5924352,US GCDC PHASE 2 HOLDINGS LP ...,US GCDC PHASE 2 HOLDS LP,1,0,12/31/2023 00:00:00,5924316.0,...,0,5924352.0,0.0,720.0,0.0,,12/31/9999 00:00:00,Y,UNITED STATES,0
277774,5924325,US GCDC PHASE 1 HOLDINGS LLC ...,US GCDC PHASE 1 HOLDS LLC,5924334,GCDC PURCHASER PHASE 1 LLC ...,GCDC PURCHASER PHASE 1 LLC,1,<25,12/31/2023 00:00:00,5924325.0,...,0,5924334.0,0.0,720.0,0.0,,12/31/9999 00:00:00,Y,UNITED STATES,0
277775,5924352,US GCDC PHASE 2 HOLDINGS LP ...,US GCDC PHASE 2 HOLDS LP,5924343,US GCDC PHASE 2 HOLDINGS GP LLC ...,US GCDC PHASE 2 HOLDS GP LLC,1,<25,12/31/2023 00:00:00,5924352.0,...,0,5924343.0,0.0,720.0,0.0,,12/31/9999 00:00:00,Y,UNITED STATES,0


In [10]:
output_file_path = 'rel_df.csv'
final_df.to_csv(output_file_path, index=False)

### combine with ticker data

In [11]:
ticks = pd.read_csv('ticks.csv', delimiter='|')
ticks.head()

Unnamed: 0,Primary Dealer,Holding Company,Ticker,Start Date,End Date,Permco,gvkey
0,ABN AMRO INCORPORATED,,ABN,9/29/1998,9/15/2006,31989.0,15504.0
1,"AUBREY G. LANSTON & CO., INC.",Citigroup Inc.,C (NYSE),5/19/1960,4/17/2000,20483.0,3243.0
2,BA Securities,Bank of America Corporation,BAC (NYSE),4/18/1994,9/30/1997,437.0,7647.0
3,Banc One,JPMorgan Chase & Co. (Acquired by JPMorgan Chase),JPM (NYSE),4/1/1999,8/1/2004,20436.0,2968.0
4,Bank of America,Bank of America Corporation,BAC (NYSE),11/17/1971,4/15/1994,437.0,7647.0


In [12]:
combined_names = pd.concat([final_df['NM_SHORT_OFFSPRING'].fillna(''), final_df['NM_LGL_OFFSPRING'].fillna('')])
combined_names = final_df['NM_SHORT_OFFSPRING'].fillna('')

In [19]:
combined_names_no_spaces = [name.replace(" ", "") for name in combined_names]
combined_names_no_spaces

['PARKREALPROP',
 'MINEOLAFNCLSVCCORP',
 'BSNBRECO',
 'BSNBRECO',
 'BSNBIS',
 'BARNETTMERCHANTSVCCORP',
 'VERMONTNBTC',
 'VERMONTSVCCORP',
 'EASTERNREALESTATECORP',
 'CONPACDEVCORP',
 'GUARANTYCORP',
 'IBERIABANKMTGCO',
 'LENDERSTITLECO',
 'PULASKIBLDG',
 'DIRECTORSPROPERTIES',
 'NORTHWESTTITLESERVICESINC',
 'PULASKISERVICESINC',
 'PULASKIINSAGY',
 'UNITEDBANKPLAZABUILDINGCOR',
 'VBCINVSTMTCORP-MENOMONIE',
 'ALLEGHENYVALLEYFSLLC',
 'COMMUNITYBKRSETTLCOLLC',
 'F&MINSAGY',
 'F&MINVCORPOFTOMAH',
 'F&MINVCORPOFTOMAH',
 'ORITANIINVCORP',
 'ORMONLLC',
 'ORITANIFNC',
 'ZORM2009LLC',
 '',
 'NORTHERNDATASVCS',
 'TRADESMENSBLDGCORP',
 'BNEDATASVCSCORP',
 'CBTLEASINGCORP',
 'CBTCREDITCORP',
 'BLACKROCKPROPERTIES',
 'CHRGENERAL',
 'NEWENGLANDASSC',
 'NEACORP',
 'CVCORP',
 'NEWENGLANDDATASVCSCORP',
 'DANIENTERPRISES',
 'DANIIENTERPRISES',
 'NCHOLDINGS',
 'MOONSCAPEENT',
 'FP',
 'NAPERSCTYCORP',
 'FIRSTARILRGNLOPRTNSCNTR',
 'SEASHOREFSLLC',
 'OCHBINVCO',
 'CLOVERLEAFINSAGY',
 'CLOVERLEAFINVCORP',
 '

### Match names using Levenshtein distance (fuzzywuzzy)

In [14]:
primary_dealer_matches = []
best_match_holding_companies = []

for dealer in ticks['Primary Dealer']:
    dealer_upper = dealer.upper().replace(" ", "")
    closest_match, score = process.extractOne(dealer_upper, combined_names_no_spaces)
    
    if closest_match and score >= 90:
        match_index = combined_names_no_spaces.index(closest_match)
        original_closest_match = combined_names[match_index]
        holding_company_name = final_df.iloc[match_index]['NM_SHORT'] 
    else:
        original_closest_match = None
        holding_company_name = None
        
    primary_dealer_matches.append(original_closest_match)
    best_match_holding_companies.append(holding_company_name)

In [15]:
ticks['best_match'] = primary_dealer_matches
ticks['best_match_holding_company'] = best_match_holding_companies
ticks.iloc[60:100,:]

Unnamed: 0,Primary Dealer,Holding Company,Ticker,Start Date,End Date,Permco,gvkey,best_match,best_match_holding_company
60,"KIDDER, PEABODY & CO., INCORPORATED",General Electric Company (historical),GE (NYSE),2/7/1979,12/30/1994,20792.0,5047.0,,
61,"KLEINWORT BENSON GOV'T SEC., INC.",Kleinwort Benson Group,,2/13/1980,12/27/1989,,,,
62,Lehman,Lehman Brothers Holdings Inc.,,2/22/1973,1/29/1974,21606.0,30128.0,LEHMAN BROS MRCH BKG PARTSHP L,BANK OF AMER CAP CORP
63,Lehman,Lehman Brothers Holdings Inc.,,11/25/1976,9/22/2008,21606.0,30128.0,LEHMAN BROS MRCH BKG PARTSHP L,BANK OF AMER CAP CORP
64,"L.F.ROTHSCHILD & CO., INC.",,,12/11/1986,1/17/1989,25825.0,12223.0,,
65,"LLOYDS GOV'T SECURITIES, INC.",LLOYDS BANKING GROUP PLC,LLOY (LSE),12/22/1987,4/28/1989,42417.0,15929.0,,
66,MALON S. ANDRUS INC.,,,5/19/1960,11/24/1965,,,,
67,MANUFACTURERS HANOVER SECURITIES COR,Manufacturers Hanover Corporation,MHC,8/31/1983,12/31/1991,21150.0,7003.0,,
68,MERRILL LYNCH GOVERNMENT SEC. INC.,"Merrill Lynch & Co., Inc",MER,5/19/1960,2/11/2009,21190.0,7267.0,,
69,MERRILL LYNCH GOVERNMENT SEC. INC.,Bank of America Corporation,BAC (NYSE),11/1/2010,Current,3151.0,7647.0,,


In [16]:
ticks.tail(10)

Unnamed: 0,Primary Dealer,Holding Company,Ticker,Start Date,End Date,Permco,gvkey,best_match,best_match_holding_company
102,TD SECURITIES (USA) LLC,The Toronto-Dominion Bank,"TD (NYSE, TSX)",2/11/2014,Current,29152.0,15706.0,T D SECURITIES,TORONTO-DOMINION BK
103,The Royal Bank of Scotland,The Royal Bank of Scotland Group plc,,4/1/2009,Current,,,LAND,
104,THOMSON MCKINNON SECURITIES INC.,Thomson McKinnon Group Inc.,,12/11/1986,7/7/1989,,,,
105,UBS SECURITIES INC.,UBS Group AG,UBSG (SIX Swiss Ex),12/7/1989,Current,37504.0,144496.0,I T,HANCOCK BK SECURITIES CORP
106,"WEEDEN & CO., INC.","Merrill Lynch & Co., Inc.",,6/17/1976,5/15/1978,21190.0,7267.0,,
107,"WERTHEIM SCHRODER & CO., INC.",Schroder Wertheim & Co. Inc.,,6/24/1988,11/8/1990,,,SCHRODER & CO,CITIGROUP FNCL PROD
108,WESTPAC POLLOCK GOV'T SECURITIES INC,Westpac Banking Corporation,WBK (NYSE),2/4/1987,6/27/1990,22027.0,15362.0,WESTPAC POLLOCK,WESTPAC USA
109,"WHITE, WELD & CO INC.","Merrill Lynch & Co., Inc.",,2/26/1976,4/18/1978,21190.0,7267.0,TE,
110,"YAMAICHI INT'L (AMERICA), INC.",Yamaichi Securities Company Limited,,9/29/1988,12/4/1997,,,,
111,Zions,Zions Bancorporation,ZION (NASDAQ),8/11/1993,3/31/2002,21305.0,8007.0,ZIONS CR CORP,ZIONS BC NA
