In [1]:
import pandas as pd
import numpy as np

In [2]:
# Read Data
file_name = "2020-qtr1-form4-raw.json"
data_directory = f"data/{file_name}"

data = pd.read_json(data_directory)

In [29]:
# Add Helpful Column
def addTxnDesc(df):
    '''
    Match code to description from SEC Form 4 instructions
    Instructions: https://www.sec.gov/about/forms/form4data.pdf

    '''

    transDict = {
        # General Transaction Codes
        'P':'Open market or private purchase of non-derivative or derivative security',
        'S':'Open market or private sale of non-derivative or derivative security',
        'V':'Transaction Voluntarily reported earlier than required',
        # Rule 16b-3 Transaction Codes
        'A':'Grant, award or other acquisition pursuant to Rule 16b-3(d)',
        'D':'Disposition to the issuer of issuer equity securities pursuant to Rule 16b-3(e)',
        'F':'Payment of exercise price or tax liability by delivering or witholding securities incident to receipt, exercise or vesting of a security issued in accordance with Rule 16b-3',
        'I':'Discretionary transaction in accordance with Rule 16b-3(f) resulting in acquisition or disposition of issuer securities',
        'M':'Exercise or conversion of derivative security exempted pursuant to Rule 16b-3',
        # Derivative Securities Codes (Except for transactions exempted pursuant to Rule 16b-3)
        'C': 'Conversion of derivative security',
        'E': 'Expiration of short derivative position',
        'H': 'Expiration (or cancellation) of long derivative position with value received',
        'O': 'Exercise of out-of-the-money derivative security',
        'X': 'Exercise of in-the-money or at-the-money derivative security',
        # Other Section 16(b) Exempt Transaction and Small Acquisition Codes (except for Rule 16b-3 codes above)
        'G':'Bona fide gift',
        'L':'Small acquisition under Rule 16a-6',
        'W':'Acquisition or disposition by will or the laws of descent and distribution',
        'Z':'Deposit into or withdrawal from voting trust',
        # Other Transaction Codes
        'J':'Other acquisition or dispotition (describe transaction)',
        'K':'Transaction in equity swap or instrument with similar characteristics',
        'U':'Disposition pursuant to a tender of shares in a change of control transaction'
    }
    for key in transDict:
        df.loc[df['Transaction Code'] == key,'Transaction Description'] = transDict[key]

    return df

data = addTxnDesc(data)

In [30]:
# Investigate missing Transaction Descriptions


# Verify that all transactions with code == '' are the same rows with missing Transaction Descriptions
# If this weren't the case, then addTxnDesc would not have full coverage on all transaction codes

set_of_empties = set(data[data['Transaction Code'] == ''][['Transaction Code','link']].index)
set_of_nas = set(data[data['Transaction Description'].isna()][['Transaction Code','link']].index)

list(set_of_empties- set_of_nas)

[]