<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Holdings-company-information" data-toc-modified-id="Holdings-company-information-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Holdings company information</a></span></li><li><span><a href="#Download-holdings-data" data-toc-modified-id="Download-holdings-data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Download holdings data</a></span></li><li><span><a href="#Download-fund-information-data" data-toc-modified-id="Download-fund-information-data-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Download fund information data</a></span></li><li><span><a href="#Download-fund-summary-data" data-toc-modified-id="Download-fund-summary-data-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Download fund summary data</a></span></li><li><span><a href="#Download-fund-style-data" data-toc-modified-id="Download-fund-style-data-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Download fund style data</a></span></li><li><span><a href="#Test-holdings-data" data-toc-modified-id="Test-holdings-data-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Test holdings data</a></span></li></ul></div>

# Load different Tables from WRDS

Large tables like the 50+ GB Holdings table were downloaded from WRDS using an FTP client

## Holdings company information

In [None]:
import wrds
import feather
import matplotlib.pyplot as plt

# Connect to DB
db = wrds.Connection(wrds_username='amglex')
print('Successfully connected')

## Download holdings data 
(only for tests / checks)

In [None]:
######################
# Query the data
######################

print('Start downloading data ...')

# SQL Query
data_raw_df = db.raw_sql(
    '''
    SELECT distinct crsp_company_key, security_name, cusip, permno, permco
    FROM holdings;
    '''
)

print('SQL successful')

print(data_raw_df.shape)

print(data_raw_df.dtypes)

print(data_raw_df.head())

path = '../data/raw/holdings_co_info.feather'

feather.write_dataframe(data_raw_df, path)

print("Successfully saved data")

## Download fund information data

In [None]:
# Connect to DB
db = wrds.Connection(wrds_username='amglex')
print('Successfully connected')


######################
# Query the data
######################


print('Start downloading data ...')

# SQL Query: Summary table //// OLD WAY
# TODO Look ahead because of per_com?

data_raw_df = db.raw_sql(
    '''
    SELECT *
    FROM fund_hdr;
    '''
)

print('SQL successful')

print(data_raw_df.shape)

path = '../../data/raw/fund_hdr_1.feather'
feather.write_dataframe(data_raw_df, path)

print("Successfully saved data")

## Download fund summary data

In [None]:
######################
# Query the data
######################

print('Start downloading data ...')

# SQL Query
data_raw_df = db.raw_sql(
    '''
    SELECT crsp_fundno, caldt, summary_period, nav_latest, nav_latest_dt, tna_latest, tna_latest_dt, per_com
    FROM fund_summary
    WHERE caldt > '2002-01-01';
    '''
)

print('SQL successful')

print(data_raw_df.shape)

print(data_raw_df.dtypes)

print(data_raw_df.head())

path = '../../data/raw/fund_summary.feather'
feather.write_dataframe(data_raw_df, path)

print("Successfully saved data")

## Download fund style data

In [None]:
######################
# Query the data
######################

print('Start downloading data ...')

# SQL Query
data_raw_df = db.raw_sql(
    '''
    SELECT crsp_fundno, begdt, enddt, lipper_class, lipper_obj_cd
    FROM fund_style;
    '''
)

print('SQL successful')

print(data_raw_df.shape)
print(data_raw_df.dtypes)
print(data_raw_df.head())

path = '../data/raw/fund_style.feather'
feather.write_dataframe(data_raw_df, path)

print("Successfully saved data")

## Test holdings data

In [None]:
######################
# Query the data
######################

print('Start downloading data ...')

# SQL Query
data_raw_df = db.raw_sql(
    '''
    SELECT EXTRACT(YEAR FROM report_dt) as year, COUNT(DISTINCT crsp_portno) as count_portno
    FROM holdings
    GROUP BY year;
    '''
)

print('SQL successful')

print(data_raw_df.shape)

print(data_raw_df.dtypes)

print(data_raw_df.head())