# Load different Tables from WRDS

Large tables like the 50+ GB Holdings table were downloaded from WRDS using an FTP client

## Holdings company information

In [1]:
import wrds
import feather

# Connect to DB
db = wrds.Connection(wrds_username='amglex')
print('Successfully connected')

  """)


Loading library list...
Done
Successfully connected


In [8]:
######################
# Query the data
######################

print('Start downloading data ...')

# SQL Query
data_raw_df = db.raw_sql(
    '''
    SELECT distinct crsp_company_key, security_name, cusip, permno, permco
    FROM holdings;
    '''
)

print('SQL successful')

print(data_raw_df.shape)

print(data_raw_df.dtypes)

print(data_raw_df.head())

path = '../data/raw/holdings_co_info.feather'

feather.write_dataframe(data_raw_df, path)

print("Successfully saved data")

Start downloading data ...
SQL successful
(1621421, 5)
crsp_company_key    float64
security_name        object
cusip                object
permno              float64
permco              float64
dtype: object
   crsp_company_key         security_name     cusip   permno   permco
0         3000001.0              AAON INC  00036020  76868.0  10817.0
1         3000002.0            A A R CORP  00036110  54594.0  20000.0
2         3000003.0             A B B LTD  00037520  88953.0  41444.0
3         3000004.0         A B C BANCORP  00040010  80498.0  13092.0
4         3000005.0  A B WATLEY GROUP INC  00078810  86828.0  16482.0
Successfully saved data


## Load Fund information

In [None]:
# Connect to DB
db = wrds.Connection(wrds_username='amglex')
print('Successfully connected')


######################
# Query the data
######################


print('Start downloading data ...')

#SQL Query: Summary table //// OLD WAY
# TODO Look ahead because of per_com?

data_raw_df = db.raw_sql(
    '''
    SELECT hdr.crsp_fundno, hdr.crsp_portno, hdr.fund_name,
        first_offer_dt, index_fund_flag, et_flag,
        begdt, enddt, lipper_class, avrcs
    FROM fund_hdr hdr
    FULL JOIN fund_style style
    ON hdr.crsp_fundno = style.crsp_fundno
    
    LEFT JOIN   
        (SELECT distinct 
            crsp_fundno, sum(per_com)/count(per_com) as avrcs
        FROM fund_summary 
        GROUP BY crsp_fundno) b
    ON style.crsp_fundno = b.crsp_fundno;
    '''
)

print('SQL successful')

print(data_raw_df.shape)

path = '../../data/raw/total_summary_new.feather'
feather.write_dataframe(data_raw_df, path)

print("Successfully saved data")