# Load different Tables from WRDS

Large tables like the 50+ GB Holdings table were downloaded from WRDS using an FTP client

## Holdings company information

In [None]:
import wrds
import feather
import matplotlib.pyplot as plt

# Connect to DB
db = wrds.Connection(wrds_username='amglex')
print('Successfully connected')

## I. Download holdings data 
(only for tests / checks)

In [None]:
######################
# Query the data
######################

print('Start downloading data ...')

# SQL Query
data_raw_df = db.raw_sql(
    '''
    SELECT distinct crsp_company_key, security_name, cusip, permno, permco
    FROM holdings;
    '''
)

print('SQL successful')

print(data_raw_df.shape)

print(data_raw_df.dtypes)

print(data_raw_df.head())

path = '../data/raw/holdings_co_info.feather'

feather.write_dataframe(data_raw_df, path)

print("Successfully saved data")

## II. Download fund information data

In [None]:
# Connect to DB
db = wrds.Connection(wrds_username='amglex')
print('Successfully connected')


######################
# Query the data
######################


print('Start downloading data ...')

# SQL Query: Summary table //// OLD WAY
# TODO Look ahead because of per_com?

data_raw_df = db.raw_sql(
    '''
    SELECT crsp_fundno, crsp_portno, fund_name, chgdt, enddt, index_fund_flag, et_flag
    FROM fund_hdr_hist;
    '''
)

print('SQL successful')

print(data_raw_df.shape)

path = '../../data/raw/fund_hdr.feather'
feather.write_dataframe(data_raw_df, path)

print("Successfully saved data")

## III. Download fund summary data

In [None]:
######################
# Query the data
######################

print('Start downloading data ...')

# SQL Query
data_raw_df = db.raw_sql(
    '''
    SELECT crsp_fundno, caldt, summary_period, nav_latest, nav_latest_dt, tna_latest, tna_latest_dt, per_com
    FROM fund_summary
    WHERE caldt > '2002-01-01';
    '''
)

print('SQL successful')

print(data_raw_df.shape)

print(data_raw_df.dtypes)

print(data_raw_df.head())

feather.write_dataframe(data_raw_df, path)

print("Successfully saved data")

## IV. Download fund style data

In [None]:
######################
# Query the data
######################

print('Start downloading data ...')

# SQL Query
data_raw_df = db.raw_sql(
    '''
    SELECT crsp_fundno, begdt, enddt, lipper_class, lipper_obj_cd
    FROM fund_style;
    '''
)

print('SQL successful')

print(data_raw_df.shape)
print(data_raw_df.dtypes)
print(data_raw_df.head())

path = '../data/raw/fund_style.feather'
feather.write_dataframe(data_raw_df, path)

print("Successfully saved data")

## Test holdings data

In [None]:
######################
# Query the data
######################

print('Start downloading data ...')

# SQL Query
data_raw_df = db.raw_sql(
    '''
    SELECT EXTRACT(YEAR FROM report_dt) as year, COUNT(DISTINCT crsp_portno) as count_portno
    FROM holdings
    GROUP BY year;
    '''
)

print('SQL successful')

print(data_raw_df.shape)

print(data_raw_df.dtypes)

print(data_raw_df.head())

In [None]:
plt.bar(data_raw_df['year'],data_raw_df['count_portno'])

In [None]:
# old sql

'''
    SELECT hdr.crsp_fundno, hdr.crsp_portno, hdr.fund_name,
        first_offer_dt, index_fund_flag, et_flag,
        begdt, enddt, lipper_class, avrcs
    FROM fund_hdr hdr
    FULL JOIN fund_style style
    ON hdr.crsp_fundno = style.crsp_fundno
    
    LEFT JOIN   
        (SELECT distinct 
            crsp_fundno, sum(per_com)/count(per_com) as avrcs
        FROM fund_summary 
        GROUP BY crsp_fundno) b
    ON style.crsp_fundno = b.crsp_fundno;
    '''

