<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [None]:
# Description
#
#

import wrds
import feather

import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt

In [None]:
###################
# Connect to WRDS #
###################
conn=wrds.Connection(wrds_username='amglex')

In [None]:
######################################
# Step 1                             #
# CRSP Block                         #
######################################

# set sample date range
begdate = '03/01/2008'
enddate = '12/31/2008'

In [None]:
# sql similar to crspmerge macro

crsp_m = conn.raw_sql(f"""
                      select permno, date, cfacpr, cfacshr, ret, shrout, prc
                      from crsp.msf as a
                      where a.date between '{begdate}' and '{enddate}'
                      """, date_cols=['date']) 

In [None]:
check = crsp_m
crsp_m.head()

In [None]:
crsp_m.date = pd.DatetimeIndex(crsp_m.date)

In [None]:
# Change all dates to last day of respective month for later joining

import datetime

def last_day_of_month(any_day):
    next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
    return next_month - datetime.timedelta(days=next_month.day)

crsp_m.date = crsp_m.date.apply(last_day_of_month)

# Test to check if all dates are month end -> correct if = 0
date = pd.DatetimeIndex(crsp_m.date)
sum(date.is_month_end == 0)
del(date)

In [None]:
# Calculate adjusted SHROUT and Price

crsp_m['TSO'] = crsp_m.shrout * crsp_m.cfacshr * 1000 # Compute "Adjusted total shares outstanding"
crsp_m['P'] = abs(crsp_m.prc)/crsp_m.cfacpr           # "Price at t period end, adjusted"
crsp_m['P_1'] = crsp_m.P.shift(1)                     # "Price at t-1, adjusted"

In [None]:
crsp_m = crsp_m.drop(columns = ['prc', 'cfacpr', 'shrout'])

In [None]:
/* Step 1.3. Get report and vintage dates from Thomson-Reuters Mutual Fund Holdings */
/* Exclude Non-Equity Funds from Holdings data that is reported as of Fiscal Quarter End */
/* First, Keep First Vintage with Holdings Data for Each RDATE-FUNDNO */
proc sql;
create table First_Vint
as select distinct intnx("month",rdate,0,"E") as rdate format date9., fdate, fundno
from tfn.s12type1
where ("&begdate"d <= rdate <="&enddate"d and ioc not in (1,5,6,7))
group by fundno, intnx("month",rdate,0,"E")
having fdate=min(fdate) and max(rdate)=rdate
order by fundno, rdate desc;
quit;

In [None]:
# Step 1.3. Get report and vintage dates from Thomson-Reuters Mutual Fund Holdings
# Exclude Non-Equity Funds from Holdings data that is reported as of Fiscal Quarter End
# First, Keep First Vintage with Holdings Data for Each RDATE-FUNDNO

In [None]:
TR = conn.raw_sql(f"""
                    SELECT distinct rdate, fdate, fundno
                    FROM tfn.s12type1
                    WHERE rdate between '{begdate}' and '{enddate}'
                    AND ioc not in (1,5,6,7)
                    """, date_cols=['rdate','fdate']) 

TR.dtypes

In [None]:
TR.rdate = TR.rdate.apply(last_day_of_month)

# Test to check if all dates are month end -> correct if = 0
date = pd.DatetimeIndex(TR.rdate)
print(sum(date.is_month_end == 0))
del(date)

In [None]:
TR.groupby(by = ['fundno', 'rdate']).count()

In [None]:
# Connect to DB
db = wrds.Connection(wrds_username='amglex')
print('Successfully connected')

In [None]:
tfn = "/wrds/tfn/sasdata/s12";
mfl = "/wrds/mfl/sasdata";
ff = "/wrds/ff/sasdata";
 
# Step 1.1. Specifying Options
begdate = '1980-03-01'
enddate = '2008-12-31'

In [None]:
######################
# Query the data
######################

print('Start downloading data ...')

# SQL Query
data_raw_df = db.raw_sql(
    '''
    SELECT crsp_company_key, security_name
    FROM holdings 
    LIMIT 10000000   
    '''
)

print('SQL successful')

print(data_raw_df.shape)

print(data_raw_df.dtypes)

print(data_raw_df.head())

path = '../../data/raw/holdings_co_info.feather'

feather.write_dataframe(data_raw_df, path)

print("Successfully saved data")