# SEC Form 4 Data Collation
Full README.md see here: https://www.sec.gov/files/insider_transactions_readme.pdf 

In [1]:
import pandas as pd

In [2]:
DATA_FOLDER = "FINAL_RAW_DATA"
LITIGATIONS_DATA_PATH = "infected.csv"

In [3]:
## Form Submission Main data
submission_data = pd.read_csv(f"{DATA_FOLDER}/SUBMISSION.csv") # ACCESSION_NUMBER is the primary key

## Transaction info for each submission (buy and sell), ACCESSION_NUMBER and (NON)DERIV_TRANS_SK are the primary keys
# One form (i.e. ACCESSION_NUMBER) can have multiple transactions (i.e. *_SK), transactions can be across multiple years, max 30 each 
# Duplicate *_SK keys are for different transactions, and there are max 2 of each duplicate _SK keys
nonderiv_trans_data = pd.read_csv(f"{DATA_FOLDER}/NONDERIV_TRANS.csv")
deriv_trans_data = pd.read_csv(f"{DATA_FOLDER}/DERIV_TRANS.csv")

## Holding info for each submission (what they have - AFter each transaction..?)
nonderiv_holding_data = pd.read_csv(f"{DATA_FOLDER}/NONDERIV_HOLDING.csv")
deriv_holding_data = pd.read_csv(f"{DATA_FOLDER}/DERIV_HOLDING.csv")

## Name info
reporting_owner_data = pd.read_csv(f"{DATA_FOLDER}/REPORTINGOWNER.csv")
names_data = pd.read_csv(f"{DATA_FOLDER}/OWNER_SIGNATURE.csv") 

## Additional info, to match with '*_FN' columns in all other datasets based on matching ACCESSION_NUMBER
footnotes_data = pd.read_csv(f"{DATA_FOLDER}/FOOTNOTES.csv")

  nonderiv_trans_data = pd.read_csv(f"{DATA_FOLDER}/NONDERIV_TRANS.csv")
  deriv_trans_data = pd.read_csv(f"{DATA_FOLDER}/DERIV_TRANS.csv")
  nonderiv_holding_data = pd.read_csv(f"{DATA_FOLDER}/NONDERIV_HOLDING.csv")
  deriv_holding_data = pd.read_csv(f"{DATA_FOLDER}/DERIV_HOLDING.csv")


In [4]:
# Overview of the data. Drop duplicates and check columns will all null, or all consistent value
dataframes = {
    'submission_data': submission_data,
    'nonderiv_trans_data': nonderiv_trans_data,
    'deriv_trans_data': deriv_trans_data,
    'nonderiv_holding_data': nonderiv_holding_data,
    'deriv_holding_data': deriv_holding_data,
    'reporting_owner_data': reporting_owner_data,
    'names_data': names_data,
    'footnotes_data': footnotes_data
}

for name, df in dataframes.items():
    shape = df.shape

    print(f"{name}, {shape}")

    df.drop_duplicates(inplace=True)
    if df.shape[0] != shape[0]:
        print(f"Duplicate rows removed: {shape[0] - df.shape[0]}")
    else:
        print("No duplicate rows")
    null_columns = df.columns[df.isnull().all()]
    print(f"Columns with all null values: {null_columns.tolist()}")
    constant_columns = [col for col in df.columns if df[col].nunique() == 1]
    print(f"Columns with constant values: {constant_columns}")
    print()

submission_data, (2917488, 13)
No duplicate rows
Columns with all null values: []
Columns with constant values: []

nonderiv_trans_data, (4343860, 28)
No duplicate rows
Columns with all null values: []
Columns with constant values: []

deriv_trans_data, (1763084, 42)
No duplicate rows
Columns with all null values: ['TRANS_ACQUIRED_DISP_CD_FN']
Columns with constant values: []

nonderiv_holding_data, (1522788, 14)
No duplicate rows
Columns with all null values: ['TRANS_FORM_TYPE_FN']
Columns with constant values: ['TRANS_FORM_TYPE']

deriv_holding_data, (1000283, 26)
No duplicate rows
Columns with all null values: ['TRANS_FORM_TYPE_FN']
Columns with constant values: ['TRANS_FORM_TYPE']

reporting_owner_data, (3171123, 13)
No duplicate rows
Columns with all null values: []
Columns with constant values: []

names_data, (3119138, 3)
Duplicate rows removed: 8798
Columns with all null values: []
Columns with constant values: []

footnotes_data, (6453264, 3)
No duplicate rows
Columns with all

In [5]:
submission_data['FILING_DATE'] = pd.to_datetime(submission_data['FILING_DATE'], errors='coerce')
submission_data[['FILING_DATE']].describe() # 2011-2024

Unnamed: 0,FILING_DATE
count,2917488
mean,2017-10-29 01:33:20.134912
min,2011-01-03 00:00:00
25%,2014-04-08 00:00:00
50%,2017-10-11 00:00:00
75%,2021-05-04 00:00:00
max,2024-12-31 00:00:00


In [6]:
nonderiv_trans_data['TRANS_DATE'] = pd.to_datetime(nonderiv_trans_data['TRANS_DATE'], errors='coerce')
nonderiv_trans_data[['TRANS_DATE']].describe() # 1987 - 2047

Unnamed: 0,TRANS_DATE
count,4343807
mean,2017-10-27 23:59:20.915391488
min,1987-10-07 00:00:00
25%,2014-03-14 00:00:00
50%,2017-11-21 00:00:00
75%,2021-05-03 00:00:00
max,2047-05-24 00:00:00


In [7]:
deriv_trans_data['TRANS_DATE'] = pd.to_datetime(deriv_trans_data['TRANS_DATE'], errors='coerce')
deriv_trans_data[['TRANS_DATE']].describe() # 1993 - 2047

Unnamed: 0,TRANS_DATE
count,1763069
mean,2017-08-19 18:05:22.615166720
min,1993-12-14 00:00:00
25%,2014-02-11 00:00:00
50%,2017-06-20 00:00:00
75%,2021-03-03 00:00:00
max,2047-06-07 00:00:00


## Exploring transactions and holdings data

In [8]:
# Find overlapping ACCESSION_NUMBER values for nonderiv data
nonderv_holding_n = nonderiv_holding_data['ACCESSION_NUMBER'].nunique()
nonderv_trans_n = nonderiv_trans_data['ACCESSION_NUMBER'].nunique()
print(f"Unique ACCESSION_NUMBER for holdings: {nonderv_holding_n}, for transactions: {nonderv_trans_n}")

nonderiv_overlap_values = set(nonderiv_holding_data['ACCESSION_NUMBER']) & set(nonderiv_trans_data['ACCESSION_NUMBER'])
if nonderiv_overlap_values:
    print("Overlapping ACCESSION_NUMBER values found:", len(nonderiv_overlap_values), round(len(nonderiv_overlap_values) / min((nonderv_trans_n, nonderv_holding_n)), 2))
else:
    print("No overlap found.")

Unique ACCESSION_NUMBER for holdings: 805661, for transactions: 2200849
Overlapping ACCESSION_NUMBER values found: 603111 0.75


In [9]:
# Find overlapping ACCESSION_NUMBER values for deriv data
derv_holding_n = deriv_holding_data['ACCESSION_NUMBER'].nunique()
derv_trans_n = deriv_trans_data['ACCESSION_NUMBER'].nunique()
print(f"Unique ACCESSION_NUMBER for holdings: {derv_holding_n}, for transactions: {derv_trans_n}")

deriv_overlap_values = set(deriv_holding_data['ACCESSION_NUMBER']) & set(deriv_trans_data['ACCESSION_NUMBER'])
if deriv_overlap_values:
    print("Overlapping ACCESSION_NUMBER values found:", len(deriv_overlap_values), round(len(deriv_overlap_values) / min((derv_trans_n, derv_holding_n)), 2))
else:
    print("No overlap found.")

Unique ACCESSION_NUMBER for holdings: 246962, for transactions: 1107281
Overlapping ACCESSION_NUMBER values found: 83827 0.34


### Case: Overlapping ACCESSION_NUMBER for deriv transactions and holidngs
1. '0001062993-24-014375', 1 deriv transaction, 8 holdings, including varying values of 'SHRS_OWND_FOLWNG_TRANS' for different stocks, but no date of transaction
2. '0001654954-19-000382', 1 deriv transaction, 2 holdings, varying 'SHRS_OWND_FOLWNG_TRANS' for same stocks, also no date
3. '0001209191-11-009901', 1 deriv transaction, 5 holdings, varying 'SHRS_OWND_FOLWNG_TRANS' for 1 different and 4 same stocks, some have non-null EXERCISE_DATE
4. '0001437749-21-019709',
5. '0001181431-11-003532',
etc

* Note that nonderiv holdings have a different schema compared to deriv holdings

In [10]:
ACCESSION_NUMBER_TEST = '0001209191-11-009901'

In [11]:
deriv_trans_data[deriv_trans_data['ACCESSION_NUMBER']== ACCESSION_NUMBER_TEST]

Unnamed: 0,ACCESSION_NUMBER,DERIV_TRANS_SK,SECURITY_TITLE,SECURITY_TITLE_FN,CONV_EXERCISE_PRICE,CONV_EXERCISE_PRICE_FN,TRANS_DATE,TRANS_DATE_FN,DEEMED_EXECUTION_DATE,DEEMED_EXECUTION_DATE_FN,...,UNDLYNG_SEC_VALUE,UNDLYNG_SEC_VALUE_FN,SHRS_OWND_FOLWNG_TRANS,SHRS_OWND_FOLWNG_TRANS_FN,VALU_OWND_FOLWNG_TRANS,VALU_OWND_FOLWNG_TRANS_FN,DIRECT_INDIRECT_OWNERSHIP,DIRECT_INDIRECT_OWNERSHIP_FN,NATURE_OF_OWNERSHIP,NATURE_OF_OWNERSHIP_FN
1020924,0001209191-11-009901,1323168,Stock Option,,4.52,,2011-02-11,,,,...,,,0.0,,,,D,,,


In [12]:
deriv_holding_data[deriv_holding_data['ACCESSION_NUMBER']== ACCESSION_NUMBER_TEST]

Unnamed: 0,ACCESSION_NUMBER,DERIV_HOLDING_SK,SECURITY_TITLE,SECURITY_TITLE_FN,CONV_EXERCISE_PRICE,CONV_EXERCISE_PRICE_FN,TRANS_FORM_TYPE,TRANS_FORM_TYPE_FN,EXERCISE_DATE,EXERCISE_DATE_FN,...,UNDLYNG_SEC_VALUE,UNDLYNG_SEC_VALUE_FN,SHRS_OWND_FOLWNG_TRANS,SHRS_OWND_FOLWNG_TRANS_FN,VALU_OWND_FOLWNG_TRANS,VALU_OWND_FOLWNG_TRANS_FN,DIRECT_INDIRECT_OWNERSHIP,DIRECT_INDIRECT_OWNERSHIP_FN,NATURE_OF_OWNERSHIP,NATURE_OF_OWNERSHIP_FN
567018,0001209191-11-009901,810244,Phantom Stock Units,,0.0,,,,,F4,...,,,1688.63,,,,D,,,
567019,0001209191-11-009901,810245,Stock Option,,3.7,,,,2002-08-11,,...,,,30000.0,,,,D,,,
567020,0001209191-11-009901,810248,Stock Option,,13.1,,,,,F6,...,,,15000.0,,,,D,,,
567021,0001209191-11-009901,810247,Stock Option,,11.4,,,,,F5,...,,,30000.0,,,,D,,,
567022,0001209191-11-009901,810246,Stock Option,,4.37,,,,2003-08-11,,...,,,36000.0,,,,D,,,


## Columns to drop

In [13]:
# Columns to drop
reporting_owner_data.drop(columns=['FILE_NUMBER'], inplace=True) # File number assigned by the Commission; sourced from EDGAR
names_data.drop(columns=['OWNERSIGNATUREDATE'], inplace=True) # Reporting owner signature date in (DD-MON-YYYY) format 

## Exploring Name Info (1 submission with multiple reporting owners)
* 79206 have more than one reporting owner
* highest number of reporting owners is 10, because no more than 10 reporting persons can file any one Form 4 (see 2 cells below)


Why is the RPTOWNERNAME so weird? 

* Reccomendation: maybe we want to compile the names into 1 tuple, for each accession number?

In [14]:
(reporting_owner_data['ACCESSION_NUMBER'].value_counts() > 1).sum() # 0

79206

In [15]:
reporting_owner_data[reporting_owner_data['ACCESSION_NUMBER'] == '0001209191-21-007755']

Unnamed: 0,ACCESSION_NUMBER,RPTOWNERCIK,RPTOWNERNAME,RPTOWNER_RELATIONSHIP,RPTOWNER_TITLE,RPTOWNER_TXT,RPTOWNER_STREET1,RPTOWNER_STREET2,RPTOWNER_CITY,RPTOWNER_STATE,RPTOWNER_ZIPCODE,RPTOWNER_STATE_DESC
2604188,0001209191-21-007755,1812579,"TA AP VII-B DO Subsidiary Partnership, L.P.","Director,TenPercentOwner",,,200 CLARENDON STREET,56TH FLOOR,BOSTON,MA,2116,
2604189,0001209191-21-007755,1812606,"TA XI DO AIV II, L.P.","Director,TenPercentOwner",,,200 CLARENDON STREET,56TH FLOOR,BOSTON,MA,2116,
2604190,0001209191-21-007755,1034569,"TA ASSOCIATES, L.P.","Director,TenPercentOwner",,,200 CLARENDON STREET,56TH FLOOR,BOSTON,MA,2116,
2604191,0001209191-21-007755,1609539,"TA XI DO Feeder, L.P.","Director,TenPercentOwner",,,200 CLARENDON STREET,56TH FLOOR,BOSTON,MA,2116,
2604192,0001209191-21-007755,1812605,"TA SDF III DO AIV II, L.P.","Director,TenPercentOwner",,,200 CLARENDON STREET,56TH FLOOR,BOSTON,MA,2116,
2604193,0001209191-21-007755,1609553,"TA SDF III DO AIV, L.P.","Director,TenPercentOwner",,,200 CLARENDON STREET,56TH FLOOR,BOSTON,MA,2116,
2604194,0001209191-21-007755,1609536,"TA XI DO AIV, L.P.","Director,TenPercentOwner",,,200 CLARENDON STREET,56TH FLOOR,BOSTON,MA,2116,
2604195,0001209191-21-007755,1578035,"TA INVESTORS IV, L.P.","Director,TenPercentOwner",,,200 CLARENDON STREET,56TH FLOOR,BOSTON,MA,2116,
2604196,0001209191-21-007755,1548681,TA Atlantic & Pacific VII-A L.P.,"Director,TenPercentOwner",,,200 CLARENDON STREET,56TH FLOOR,BOSTON,MA,2116,
2604197,0001209191-21-007755,1609557,"TA SDF III DO Feeder, L.P.","Director,TenPercentOwner",,,200 CLARENDON STREET,56TH FLOOR,BOSTON,MA,2116,


In [16]:
# See remarks for disclaimer about reporting persons
submission_data[submission_data['ACCESSION_NUMBER'] == '0001209191-21-007755']

Unnamed: 0,ACCESSION_NUMBER,FILING_DATE,PERIOD_OF_REPORT,DATE_OF_ORIG_SUB,NO_SECURITIES_OWNED,NOT_SUBJECT_SEC16,FORM3_HOLDINGS_REPORTED,FORM4_TRANS_REPORTED,DOCUMENT_TYPE,ISSUERCIK,ISSUERNAME,ISSUERTRADINGSYMBOL,REMARKS
2409946,0001209191-21-007755,2021-02-04,2021-02-02,,,0,,,4,1794515,ZoomInfo Technologies Inc.,ZI,Because no more than 10 reporting persons can ...


In [17]:
names_data[names_data['ACCESSION_NUMBER'] == '0001209191-21-007755']
# seems like OWNERSIGNATURENAME requires .split(',') to extract alias used in Reporting owner name

Unnamed: 0,ACCESSION_NUMBER,OWNERSIGNATURENAME
2561362,0001209191-21-007755,"TA SDF III DO AIV, L.P., by TA Associates, SDF..."
2561363,0001209191-21-007755,"TA XI DO AIV, L.P., by TA Associates XI GP, L...."
2561364,0001209191-21-007755,"TA Associates, L.P., by Jeffrey C. Hadden, its..."
2561365,0001209191-21-007755,"TA Atlantic & Pacific VII-A L.P., by TA Associ..."
2561366,0001209191-21-007755,"TA Investors IV, L.P., by TA Associates, L.P.,..."
2561367,0001209191-21-007755,"TA SDF III DO AIV II, L.P., by TA Associates S..."
2561368,0001209191-21-007755,"TA XI DO AIV II, L.P., by TA Associates XI GP,..."
2561369,0001209191-21-007755,TA Associates AP VII-B DO Subsidiary Partnersh...
2561370,0001209191-21-007755,"TA SDF III DO Feeder, L.P., by TA Associates S..."
2561371,0001209191-21-007755,"TA XI DO Feeder, L.P., by TA Associates XI GP ..."


In [26]:
reporting_owner_data['RPTOWNERNAME'].value_counts().head(50)

RPTOWNERNAME
Weinstein Boaz                                  2803
Saba Capital Management, L.P.                   2802
STAHL MURRAY                                    1855
FROST PHILLIP MD ET AL                          1525
Benioff Marc                                    1518
Frost Gamma Investments Trust                   1345
HORIZON KINETICS ASSET MANAGEMENT LLC           1284
BANK OF AMERICA CORP /DE/                       1163
GABELLI MARIO J                                 1074
GOLDSTEIN PHILLIP                                732
Eberwein Jeffrey E.                              731
BAKER FELIX                                      711
BAKER JULIAN                                     708
Zuckerberg Mark                                  705
ROTHBLATT MARTINE A                              599
OBUS NELSON                                      594
Dakos Andrew                                     584
STRYKER RONDA E                                  583
Harris Parker                    

# Testing Litigation Data

In [18]:
data_litigations = pd.read_csv(LITIGATIONS_DATA_PATH)

In [19]:
data_litigations[['yr']].describe() # 1996 - 2017 

Unnamed: 0,yr
count,1222.0
mean,2006.533552
std,6.041633
min,1996.0
25%,2002.0
50%,2007.0
75%,2011.0
max,2017.0


In [20]:
pd.set_option('display.max_colwidth', None)
data_litigations.tail(2)

Unnamed: 0.1,Unnamed: 0,lt_no,yr,title,lt,class
1220,7977,24012,2017,Therapist Settles Charges of Insider Trading Ahead of Acquisition Announcement,"[A Seattle-based therapist has agreed to settle SEC charges that he traded in the stock of zulily, Inc Zulily based on information he learned from a Zulily employee during confidential counseling sessions , The SECs complaint alleges that, in July 2015, during counseling sessions, the Zulily employee told Kenneth Peer that Zulily was going to be acquired by Liberty Interactive, a media holding company On three occasions between July 21, 2015 and August 10, 2015, after counseling sessions with the Zulily employee, Peer purchased a total of over $28,000 of Zulily stock The complaint alleges that, before the market opened on August 17, 2015, Zulily announced that it had agreed to be acquired by Liberty Interactive in a tender offer By the end of trading that day, Zulilys stock allegedly had risen by 49%, with nearly 15 times the stocks average daily trading volume Shortly after the acquisition was announced, Peer allegedly sold all of his Zulily shares for illegal profits of approximately $10,000 , The SECs complaint charges Peer with violating Sections 10 b and 14 e of the Securities Exchange Act of 1934 and Rules 10b-5 and 14e-3 thereunder Without admitting or denying the SECs allegations, Peer agreed to disgorge $10,227 73 plus interest of $811 80 and pay a $10,227 73 penalty, for a total of $21,267 26 Peer also agreed to be enjoined from further violations of the charged provisions , The SECs investigation was conducted by Alice Liu Jensen and supervised by Steven D Buchholz, both of the Market Abuse Unit in the San Francisco Regional Office The SEC appreciates the assistance of FINRA in this matter , <img alt border0 height9 srcimagesarrowright_dkblue gif width10><a hreflitigationcomplaints2017comp24012 pdf>SEC Complaint<a>, , <i>https:www sec govlitigationlitreleases2017lr24012 htm<i><br>]",1
1221,7980,24015,2017,SEC Charges Former Employee and Friend with Insider Trading in Securities of International Rectifier Corporation,"[The today announced insider trading charges against a former employee of a semiconductor company and his friend for trading on nonpublic information that the company would be acquired , The SEC alleges that Lanny Brown learned that Infineon Technologies AG planned to acquire his then-employer, International Rectifier Corp IRC , before the deal was publicly announced According to the SECs complaint, Brown tipped his friend, Sean Fox, about the deal and both of them then acquired IRC call options The SEC further alleges that Brown and Fox concealed Browns involvement in the trading by depositing approximately $12,000 of their combined funds into Foxs brokerage account, and then used this account to purchase the call options for both of them The SEC also alleges that Fox closed out the option positions after the acquisition was publicly announced, and the two defendants made $369,720 in illicit profits To further hide Browns role in the trading, Fox allegedly funneled Browns share of the trading profits by paying several of Browns personal expenses and by writing checks to Browns children and stepchildren Brown and his wife then endorsed those checks and used the funds , The SECs complaint, filed in federal court in the District of Arizona, charges Brown and Fox with violating Section 10 b of the Securities Exchange Act of 1934 and Rule 10b-5 thereunder A criminal action is also pending against both Brown and Fox in the District of Arizona for the same underlying conduct In the SECs action, the defendants have consented to the entry of a final judgment that permanently enjoins them from future violations of the charged provisions of the federal securities laws The final judgment in the SECs action also orders them to pay, on a joint and several basis, disgorgement of $369,720 plus prejudgment interest of $43,147 79, with a credit for the monetary amount they have agreed to pay in the parallel criminal case against them , The settlements with the SEC are subject to court approval , The SECs investigation was conducted by Yolanda Ochoa and supervised by Finola H Manvelian of the Los Angeles office The SEC appreciates the assistance of the Financial Industry Regulatory Authority , <p classcenter>###, <img alt border0 height9 srcimagesarrowright_dkblue gif width10><a hreflitigationcomplaints2017comp24015 pdf>SEC Complaint<a>, , <i>https:www sec govlitigationlitreleases2017lr24015 htm<i><br>]",1


In [21]:
## DeepSeek's AI, number is index of litigations to avoid confusion
result_1220 = {
  "NameOfTrader": "Kenneth Peer",
  "Profession": "Seattle-based therapist",
  "CompanyInvolved": "zulily, Inc (Zulily)",
  "AcquiringCompany": "Liberty Interactive",
  "SourceOfInformation": "Zulily employee (learned during confidential counseling sessions)",
  "DateOfAcquisitionAnnouncement": "17-08-2015",
  "DatesOfIllegalTransactions": ["21-07-2015", "10-08-2015"],
  "TotalAmountInvested": 28000,
  "IllegalProfits": 10000,
  "StockPriceIncrease": "49%",
  "TradingVolumeIncrease": "15 times the average daily trading volume",
  "LegalConsequences": {
    "Disgorgement": 10227.73,
    "Interest": 811.80,
    "Penalty": 10227.73,
    "TotalPayment": 21267.26,
    "Injunction": "Enjoined from further violations of the charged provisions"
  },
  "SECCharges": [
    "Violation of Section 10(b) of the Securities Exchange Act of 1934",
    "Violation of Section 14(e) of the Securities Exchange Act of 1934",
    "Violation of Rule 10b-5",
    "Violation of Rule 14e-3"
  ],
  "SECInvestigators": {
    "Investigator": "Alice Liu Jensen",
    "Supervisor": "Steven D. Buchholz",
    "Unit": "Market Abuse Unit",
    "RegionalOffice": "San Francisco Regional Office"
  },
  "AssistanceProvidedBy": "FINRA"
}

result_1221 = {
  "NamesOfTraders": ["Lanny Brown", "Sean Fox"],
  "RelationToInsider": "Lanny Brown (former employee of International Rectifier Corp)",
  "CompanyInvolved": "International Rectifier Corp (IRC)",
  "AcquiringCompany": "Infineon Technologies AG",
  "DateOfAcquisitionAnnouncement": "Not explicitly stated in the text", #### THIS IS AN ISSUE 
  "DateOfIllegalTransactions": "Not explicitly stated in the text",     #### THIS IS AN ISSUE 
  "IllegalActivity": "Purchased IRC call options using nonpublic information about the acquisition",
  "FundsDepositedForTrading": 12000,
  "IllegalProfits": 369720,
  "PrejudgmentInterest": 43147.79,
  "LegalConsequences": "Permanent injunctions, disgorgement of $369,720, and prejudgment interest",
  "CriminalAction": "Pending in the District of Arizona",
  "SECInvestigators": {
    "Investigator": "Yolanda Ochoa",
    "Supervisor": "Finola H. Manvelian"
  },
  "AssistanceProvidedBy": "Financial Industry Regulatory Authority (FINRA)"
}

In [22]:
# From the two known datasets with Name Info, attempt to find ["Lanny Brown", "Sean Fox"]
owner_signature = names_data['OWNERSIGNATURENAME'].str.lower().values
reportin_owner = reporting_owner_data['RPTOWNERNAME'].dropna().str.lower().values

In [23]:
NAME = 'Peer Kenneth' # Peer, Kenneth Peer
for st in owner_signature:
    if NAME.lower() in st:
        print(st)

for st in reportin_owner:
    if NAME.lower() in st:
        print(st)

In [24]:
NAME = 'Fox Sean' #'Sean Fox' #'Brown Lanny' #'Lanny Brown'

for st in owner_signature:
    if NAME.lower() in st:
        print(st)

for st in reportin_owner:
    if NAME.lower() in st:
        print(st)