In [1]:
import pandas as pd
import numpy as np
import sqlite3
from fuzzywuzzy import process

### Download the data from NIC
- www.ffiec.govnpw/FinancialReport/ReturnRelationshipsZipFile
- www.ffiec.govnpw/FinancialReport/ReturnAttributesActiveZipFileCSV
- www.ffiec.govnpw/FinancialReport/ReturnAttributesClosedZipFileCSV

In [2]:
rel_data = pd.read_csv('../data/CSV_RELATIONSHIPS.CSV') # maps the holding companies and their offsprings

att_data_active = pd.read_csv('../data/CSV_ATTRIBUTES_ACTIVE.CSV') # includes names and attributes of companies (active)
att_data_closed = pd.read_csv('../data/CSV_ATTRIBUTES_CLOSED.CSV') # includes names and attributes of companies (closed)

  att_data_active = pd.read_csv('../data/CSV_ATTRIBUTES_ACTIVE.CSV') # includes names and attributes of companies (active)
  att_data_closed = pd.read_csv('../data/CSV_ATTRIBUTES_CLOSED.CSV') # includes names and attributes of companies (closed)


### Combine all the banks and institutions (active and closed)
    for which the Fed has a supervisory, regulatory, or research interest

In [3]:
att_data = pd.concat([att_data_active, att_data_closed])
att_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 218085 entries, 0 to 156438
Data columns (total 74 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   #ID_RSSD            218085 non-null  int64  
 1   D_DT_START          218085 non-null  object 
 2   D_DT_END            218085 non-null  object 
 3   BHC_IND             218085 non-null  int64  
 4   BROAD_REG_CD        218085 non-null  int64  
 5   CHTR_AUTH_CD        218085 non-null  int64  
 6   CHTR_TYPE_CD        218085 non-null  int64  
 7   FBO_4C9_IND         218085 non-null  int64  
 8   FHC_IND             218085 non-null  int64  
 9   FUNC_REG            218085 non-null  int64  
 10  INSUR_PRI_CD        218085 non-null  int64  
 11  MBR_FHLBS_IND       218085 non-null  int64  
 12  MBR_FRS_IND         218085 non-null  int64  
 13  SEC_RPTG_STATUS     218085 non-null  int64  
 14  EST_TYPE_CD         218085 non-null  int64  
 15  BANK_CNT            14793 non-null   fl

In [4]:
selected_columns_descriptions = {
    '#ID_RSSD': 'RSSD ID',
    'BHC_IND': 'Bank Holding Company Indicator',
    'CHTR_TYPE_CD': 'Entity Type Code',
    'FHC_IND': 'Financial Holding Company Indicator',
    'D_DT_EXIST_CMNC': 'Date of Commencement of Existence',
    'D_DT_EXIST_TERM': 'Date of Termination of Existence',
    'NM_LGL': 'Legal Name',
    'NM_SHORT': 'Short Name',
    'DOMESTIC_IND': 'Domestic Indicator',
    'CNTRY_NM': 'Country Name',
    'ID_CUSIP': 'CUSIP ID',
}

In [5]:
selected_columns = list(selected_columns_descriptions.keys())

att_selected = att_data[selected_columns]
att_selected.info()

<class 'pandas.core.frame.DataFrame'>
Index: 218085 entries, 0 to 156438
Data columns (total 11 columns):
 #   Column           Non-Null Count   Dtype 
---  ------           --------------   ----- 
 0   #ID_RSSD         218085 non-null  int64 
 1   BHC_IND          218085 non-null  int64 
 2   CHTR_TYPE_CD     218085 non-null  int64 
 3   FHC_IND          218085 non-null  int64 
 4   D_DT_EXIST_CMNC  40501 non-null   object
 5   D_DT_EXIST_TERM  218085 non-null  object
 6   NM_LGL           218085 non-null  object
 7   NM_SHORT         218085 non-null  object
 8   DOMESTIC_IND     218085 non-null  object
 9   CNTRY_NM         218085 non-null  object
 10  ID_CUSIP         218085 non-null  object
dtypes: int64(4), object(7)
memory usage: 20.0+ MB


In [6]:
selected_columns_descriptions = {
    '#ID_RSSD_PARENT': 'RSSD ID of Parent',
    'ID_RSSD_OFFSPRING': 'RSSD ID of Offspring',
    'RELN_LVL': 'Relationship Level',
    'PCT_EQUITY_BRACKET': 'Percent Equity Bracket',
    'D_DT_RELN_EST': 'Date Relationship Was Established',
}

In [7]:
selected_columns = list(selected_columns_descriptions.keys())

rel_selected = rel_data[selected_columns]
rel_selected.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 277752 entries, 0 to 277751
Data columns (total 5 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   #ID_RSSD_PARENT     277752 non-null  int64 
 1   ID_RSSD_OFFSPRING   277752 non-null  int64 
 2   RELN_LVL            277752 non-null  int64 
 3   PCT_EQUITY_BRACKET  277752 non-null  object
 4   D_DT_RELN_EST       277752 non-null  object
dtypes: int64(3), object(2)
memory usage: 10.6+ MB


### merge

In [27]:
id_permco = pd.read_csv('../data/crsp_20140331.csv', encoding='latin1')
id_permco = id_permco.drop([0, 1])
id_permco.head()

Unnamed: 0,notice,name,inst_type,entity,permco,dt_start,dt_end
2,,Amsouth Bancorporation,Bank Holding Company,1078604.0,25.0,19900101.0,20061104.0
3,,American Express Company,Bank Holding Company,1275216.0,90.0,19900101.0,20140331.0
4,,"Affiliated Bankshares Of Colorado, Inc.",Bank Holding Company,1049734.0,94.0,19900101.0,19921030.0
5,,American International Group,Thrift holding company,1562176.0,137.0,19900101.0,20140331.0
6,,American Bancorporation,Bank Holding Company,1068623.0,196.0,19900101.0,20020301.0


In [8]:
rel_with_parent = pd.merge(rel_selected, att_selected, left_on='#ID_RSSD_PARENT', right_on='#ID_RSSD', how='left', suffixes=('', '_PARENT'))
final_df = pd.merge(rel_with_parent, att_selected, left_on='ID_RSSD_OFFSPRING', right_on='#ID_RSSD', how='left', suffixes=('', '_OFFSPRING'))

In [36]:
entity_to_permco_mapping = id_permco.set_index('entity')['permco'].to_dict()

# Map 'entity' to 'permco' and add it as a new column in final_df
final_df['permco'] = final_df['#ID_RSSD_PARENT'].map(entity_to_permco_mapping)

# If no mapping is found, set the value to 0
final_df['permco'] = final_df['permco'].fillna(0).astype(int)

In [37]:

cols = ['#ID_RSSD_PARENT','permco','NM_LGL','NM_SHORT','ID_RSSD_OFFSPRING','NM_LGL_OFFSPRING','NM_SHORT_OFFSPRING']
rest_cols = [col for col in final_df.columns if col not in cols]
final_df = final_df[cols + rest_cols]
final_df

Unnamed: 0,#ID_RSSD_PARENT,permco,NM_LGL,NM_SHORT,ID_RSSD_OFFSPRING,NM_LGL_OFFSPRING,NM_SHORT_OFFSPRING,RELN_LVL,PCT_EQUITY_BRACKET,D_DT_RELN_EST,...,ID_CUSIP,#ID_RSSD_OFFSPRING,BHC_IND_OFFSPRING,CHTR_TYPE_CD_OFFSPRING,FHC_IND_OFFSPRING,D_DT_EXIST_CMNC_OFFSPRING,D_DT_EXIST_TERM_OFFSPRING,DOMESTIC_IND_OFFSPRING,CNTRY_NM_OFFSPRING,ID_CUSIP_OFFSPRING
0,130,0,PARK BANK OF FLORIDA ...,PARK BK OF FL,1081305,"PARK REAL PROPERTY, INC. ...",PARK REAL PROP,1,80-100,06/27/1984 00:00:00,...,0,1081305.0,0.0,720.0,0.0,,02/14/1986 00:00:00,Y,UNITED STATES,0
1,279,0,"BROADSTREET BANK, SSB ...",BROADSTREET BK SSB,3923155,MINEOLA FINANCIAL SERVICE CORPORATION ...,MINEOLA FNCL SVC CORP,1,100,01/23/2008 00:00:00,...,0,3923155.0,0.0,720.0,0.0,,12/31/9999 00:00:00,Y,UNITED STATES,0
2,505,0,BALLSTON SPA NATIONAL BANK ...,BALLSTON SPA NB,2913168,"BSNB REAL ESTATE COMPANY, INC. ...",BSNB RE CO,1,80-100,05/27/1999 00:00:00,...,0,2913168.0,0.0,720.0,0.0,05/27/1999 00:00:00,12/31/9999 00:00:00,Y,UNITED STATES,0
3,505,0,BALLSTON SPA NATIONAL BANK ...,BALLSTON SPA NB,2913168,"BSNB REAL ESTATE COMPANY, INC. ...",BSNB RE CO,1,100,05/27/1999 00:00:00,...,0,2913168.0,0.0,720.0,0.0,05/27/1999 00:00:00,12/31/9999 00:00:00,Y,UNITED STATES,0
4,505,0,BALLSTON SPA NATIONAL BANK ...,BALLSTON SPA NB,3079566,BSNB I. S. INC. ...,BSNB I S,1,80-100,06/01/2000 00:00:00,...,0,3079566.0,0.0,550.0,0.0,06/01/2000 00:00:00,12/31/9999 00:00:00,Y,UNITED STATES,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277772,5924316,0,US GCDC HOLDINGS 2 LLC ...,US GCDC HOLDS 2 LLC,5924343,US GCDC PHASE 2 HOLDINGS GP LLC ...,US GCDC PHASE 2 HOLDS GP LLC,1,<25,12/31/2023 00:00:00,...,0,5924343.0,0.0,720.0,0.0,,12/31/9999 00:00:00,Y,UNITED STATES,0
277773,5924316,0,US GCDC HOLDINGS 2 LLC ...,US GCDC HOLDS 2 LLC,5924352,US GCDC PHASE 2 HOLDINGS LP ...,US GCDC PHASE 2 HOLDS LP,1,0,12/31/2023 00:00:00,...,0,5924352.0,0.0,720.0,0.0,,12/31/9999 00:00:00,Y,UNITED STATES,0
277774,5924325,0,US GCDC PHASE 1 HOLDINGS LLC ...,US GCDC PHASE 1 HOLDS LLC,5924334,GCDC PURCHASER PHASE 1 LLC ...,GCDC PURCHASER PHASE 1 LLC,1,<25,12/31/2023 00:00:00,...,0,5924334.0,0.0,720.0,0.0,,12/31/9999 00:00:00,Y,UNITED STATES,0
277775,5924352,0,US GCDC PHASE 2 HOLDINGS LP ...,US GCDC PHASE 2 HOLDS LP,5924343,US GCDC PHASE 2 HOLDINGS GP LLC ...,US GCDC PHASE 2 HOLDS GP LLC,1,<25,12/31/2023 00:00:00,...,0,5924343.0,0.0,720.0,0.0,,12/31/9999 00:00:00,Y,UNITED STATES,0


In [13]:
output_file_path = '../data/rel_df.csv'
final_df.to_csv(output_file_path, index=False)

In [2]:
final_df = pd.read_csv('../data/rel_df.csv')

  final_df = pd.read_csv('../data/rel_df.csv')


### combine with ticker data

In [3]:
ticks = pd.read_csv('../data/manual/ticks_v2.csv', delimiter=',')
ticks.head()

Unnamed: 0,Primary Dealer,Holding Company,Ticker,Start Date,End Date,Permco,gvkey
0,"ABN AMRO BANK, N.V., NY BR",BAC NORTH AMERICA HOLDING COMPANY,,9/29/98,9/15/06,31989.0,15504.0
1,AUBREY G. LANSTON & CO.,"INDUSTRIAL BANK OF JAPAN, LIMITED",8302 (Japan),5/19/60,4/17/00,,15685.0
2,"BA SECURITIES, INC.",Bank of America Corporation,BAC (NYSE),4/18/94,9/30/97,437.0,7647.0
3,BANC OF AMERICA SECURITIES LLC,Bank of America Corporation,BAC (NYSE),5/17/99,11/1/10,3151.0,7647.0
4,"BANC ONE CAPITAL MARKETS, INC",JPMorgan Chase & Co. (Acquired by JPMorgan Chase),JPM (NYSE),4/1/99,8/1/04,20436.0,2968.0


In [4]:
combined_names = pd.concat([final_df['NM_SHORT_OFFSPRING'].fillna(''), final_df['NM_LGL_OFFSPRING'].fillna('')])
combined_names = final_df['NM_SHORT_OFFSPRING'].fillna('')

In [5]:
combined_names_no_spaces = [name.replace(" ", "") for name in combined_names]
combined_names_no_spaces

['584',
 '1370',
 '1774',
 '1775',
 '1777',
 '1778',
 '1779',
 '1780',
 '1781',
 '1782',
 '1783',
 '1784',
 '1785',
 '1786',
 '2402',
 '3300',
 '5511',
 '7575',
 '7575',
 '8517',
 '36000',
 '2771211',
 '@VENTURESEXPANSIONFUNDLP',
 '0JLOVELLHOLDING',
 '0168STATTR',
 '0179STATTR',
 '0866101BC',
 '0866102BC',
 '0890859BCUNLIMITEDLIABILITY',
 '0891145BCUNLIMITEDLIABILITY',
 '0891623BCUNLIMITEDLIABILITY',
 '1AMEREAGLEPLAZA',
 '1NORTHDEARBORN',
 '1NORTHDEARBORNLLC',
 '1NORTHDEARBORNLLC',
 '1NORTHDEARBORNTR',
 '1NORTHEQTHEALTHCARELONG/SH',
 '1NORTHLASALLE',
 '1NORTHLASALLELLC',
 '1NORTHLASALLELLC',
 '1NORTHLASALLETR',
 '1PARKHOLDCORP',
 '1RIVERWALKLLC',
 '1-7WASHINGTONSTREETPARTNERS',
 '1-7WASHINGTONSTREETPARTNERS',
 '1-8HEDWIGCOURTBNLLC',
 '1-800-EAST-WESTMTGCO',
 '1-800-EAST-WESTMTGCO',
 '1-800-EAST-WESTMTGLLC',
 '10HILTONDRIVE',
 '10INDEPENDENCEASSCLP',
 '10INDEPENDENCEASSCLP',
 '10MINERVAPLLP',
 '10SYLVANASSCLP',
 '10SYLVANASSCLP',
 '10WNATIONWIDELLC',
 '100BLOCKASSCLIMITED',
 '100BLOCKAS

### Match names using Levenshtein distance (fuzzywuzzy)

In [6]:
primary_dealer_matches = []
best_match_holding_companies = []

def adjust_score_for_starting_char(dealer, choice, original_score, bonus=10):
    """
    Increase the score if the first character matches, indicating a better match.
    """
    if dealer[0] == choice[0]:  # Compare the starting character
        return original_score + bonus
    return original_score

for dealer in ticks['Primary Dealer']:
    dealer_upper = dealer.upper().replace(" ", "")
    closest_match, score = process.extractOne(dealer_upper, combined_names_no_spaces)
    adjusted_score = adjust_score_for_starting_char(dealer_upper, closest_match.upper().replace(" ", ""), score)
    
    if closest_match and adjusted_score >= 90:
        match_index = combined_names_no_spaces.index(closest_match)
        original_closest_match = combined_names[match_index]
        holding_company_name = final_df.iloc[match_index]['NM_LGL'] 
    else:
        original_closest_match = None
        holding_company_name = None
        
    primary_dealer_matches.append(original_closest_match)
    best_match_holding_companies.append(holding_company_name)

In [7]:
ticks['best_match'] = primary_dealer_matches
ticks['best_match_holding_company'] = best_match_holding_companies
ticks.iloc[:40,:]

Unnamed: 0,Primary Dealer,Holding Company,Ticker,Start Date,End Date,Permco,gvkey,best_match,best_match_holding_company
0,"ABN AMRO BANK, N.V., NY BR",BAC NORTH AMERICA HOLDING COMPANY,,9/29/98,9/15/06,31989.0,15504.0,ABN AMRO,BAC NORTH AMERICA HOLDING COMPANY ...
1,AUBREY G. LANSTON & CO.,"INDUSTRIAL BANK OF JAPAN, LIMITED",8302 (Japan),5/19/60,4/17/00,,15685.0,AUBREY G LANSTON & CO,"INDUSTRIAL BANK OF JAPAN, LIMITED, THE ..."
2,"BA SECURITIES, INC.",Bank of America Corporation,BAC (NYSE),4/18/94,9/30/97,437.0,7647.0,BA SECURITIES,CHINA CONSTRUCTION BANK (ASIA) CORPORATION LIM...
3,BANC OF AMERICA SECURITIES LLC,Bank of America Corporation,BAC (NYSE),5/17/99,11/1/10,3151.0,7647.0,BANCO,
4,"BANC ONE CAPITAL MARKETS, INC",JPMorgan Chase & Co. (Acquired by JPMorgan Chase),JPM (NYSE),4/1/99,8/1/04,20436.0,2968.0,BANC ONE CAPITAL MARKETS,BANC ONE CAPITAL HOLDINGS CORPORATION ...
5,BANCAMERICA ROBERTSON STEPHEN,BANKBOSTON CORP,BKB.2,10/1/97,9/30/98,20264.0,2014.0,BANCAMERICA ROBERTSON STEPHENS,ROBERTSON STEPHENS INVESTMENT MANAGEMENT CO. ...
6,BANK OF AMERICA NT & SA,Bank of America Corporation,BAC (NYSE),11/17/71,4/15/94,437.0,7647.0,BANK,"BANCSHARES, INC., THE ..."
7,"BANK OF NOVA SCOTIA, NEW YORK AGENCY",BANK OF NOVA SCOTIA,BNS (Toronto),10/4/11,Current,43264.0,15582.0,AGENCY,"LAKIN BANCSHARES, INC. ..."
8,BANKERS TRUST,Bankers Trust New York Corporation,,5/19/60,7/7/89,20266.0,2029.0,BANK,"BANCSHARES, INC., THE ..."
9,BARCLAYS CAPITAL INC.,Barclays PLC,BARC (LSE),4/1/98,Current,20269.0,12673.0,BARCLAYS CAPITAL,"BARCLAYS DE ZOETE WEDD U.S. HOLDINGS, INC. ..."


In [8]:
ticks.iloc[41:80,:]

Unnamed: 0,Primary Dealer,Holding Company,Ticker,Start Date,End Date,Permco,gvkey,best_match,best_match_holding_company
41,DISCOUNT CORPORATION OF NEW YORK,D C N Y CORP,DCY,5/19/60,8/10/93,1269.0,,DISCOUNT CORP,JPMORGAN CHASE & CO. ...
42,DLJ SECURITIES CORPORATION,Credit Suisse Group AG,CSGN (SIX Swiss Ex),3/6/74,12/31/00,42125.0,28838.0,,
43,DLJ SECURITIES CORPORATION,Credit Suisse Group AG,CSGN (SIX Swiss Ex),10/25/95,12/31/00,42125.0,28838.0,,
44,DRESDNER KLEINWORT SECURITIES LLC,Dresdner Bank AG (historical),DRSDY,5/8/97,6/26/09,1386.0,15577.0,DRESDNER KLEINWORT,COMMERZBANK AKTIENGESELLSCHAFT ...
45,DREXEL BURNHAM LAMBERT,"DREXEL BURNHAM LAMBERT GROUP, INC",,5/19/60,3/28/90,,,,
46,EASTBRIDGE CAPITAL INC.,EASTBRIDGE HOLDINGS INC.,,6/18/92,5/29/98,,,BRIDGE,
47,F.I. DUPONT & CO (DuPont Walston),,,12/12/68,7/18/73,,,,
48,FIRST CHICAGO,JPMorgan Chase & Co.,JPM (NYSE),5/19/60,3/31/99,20436.0,2968.0,FIRST BK CHICAGO,FIRST BANK CHICAGO CORP. ...
49,FIRST INTERSTATE,First Interstate Bancorp,I.1,7/31/64,6/17/88,20720.0,4710.0,FIRST INTERSTATE BK,FIRST INTERSTATE BANCORP ...
50,FIRST N/B OF BOSTON,BANKBOSTON CORP,BKB.2,3/21/83,11/17/85,20264.0,2014.0,FIRST,BASSANO PARTICIPATIONS ...


In [9]:
ticks.iloc[81:,:]

Unnamed: 0,Primary Dealer,Holding Company,Ticker,Start Date,End Date,Permco,gvkey,best_match,best_match_holding_company
81,NUVEEN GOV'T SEC. INC.,Nuveen Corporation,6051B,11/18/71,8/27/80,29493.0,25296.0,,
82,PAINE WEBBER INCORPORATED,PaineWebber Group Inc.,PWJ,6/22/72,6/27/73,21359.0,8299.0,PAINEWEBBER,UBS AMERICAS INC. ...
83,"PAINE, WEBBER, JACKSON & CURTIS INC.",PaineWebber Group Inc.,PWJ,11/25/76,12/4/00,21359.0,8299.0,JACKSON,
84,PARIBAS CORPORATION,BNP Paribas Group,BNP (Euronext Paris),5/1/97,9/14/00,,15532.0,ASCO,"PARKWAY DEVELOPMENT, INC. ..."
85,PRUDENTIAL SECURITIES INCORPO,Prudential Financial Inc.,PRU (NYSE),10/29/75,12/1/00,42524.0,143356.0,,
86,RBC CAPITAL MARKETS,Royal Bankof Canada,RY,7/8/09,Current,29151.0,15633.0,BBC CAPITAL MARKET,ONEUNITED BANK ...
87,RBS SECURITIES INC.,The Royal Bank of Scotland Group PLC,RBS,4/1/09,Current,28711.0,15634.0,EC,"GE CAPITAL US HOLDINGS, INC. ..."
88,REFCO PARTNERS,Refco Group Ltd.,RFX,11/19/80,5/7/87,47194.0,163770.0,FCOP,"BANK OF AMERICA, NATIONAL ASSOCIATION ..."
89,S.G. WARBURG & CO.,BNP Paribas Group,BNP (Euronext Paris),6/24/88,7/26/95,42524.0,143356.0,SG WARBURG & CO,PARIBAS INTERNATIONAL ...
90,SANWA SCTY USA CO LP,Sanwa Holdings Corp,,6/20/88,7/20/98,,102895.0,SANWA SCTY USA CO LP,SANWA SECURITIES (DELAWARE) INC. ...
