In [1]:
import pandas as pd
import wrds

### Step 1: Connect to WRDS

In [2]:
###################
# Connect to WRDS #
###################
conn=wrds.Connection()

WRDS recommends setting up a .pgpass file.
Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


### Step 2: Get SP500 Index Membership from CRSP
- I opt for the monthly frequency of the data, but one can choose to work with crsp.dsp500list if more precise date range is needed.

In [19]:
begdate = '01/01/2015'
enddate = '12/30/2022'

In [20]:
sp500 = conn.raw_sql(f"""
                        select a.*, b.date
                        from crsp.msp500list as a,
                        crsp.msf as b
                        where a.permno=b.permno
                        and b.date >= a.start and b.date<= a.ending
                        and b.date>='{begdate}'
                        and b.date<='{enddate}'
                        order by date;
                        """, date_cols=['start', 'ending', 'date'])

### Step 3: Add Other Company Identifiers from CRSP.MSENAMES
- You don't need this step if only PERMNO is required
- This step aims to add TICKER, SHRCD, EXCHCD and etc. 

In [22]:
# Add Other Descriptive Variables

mse = conn.raw_sql("""
                        select comnam, namedt, nameendt, 
                        permno, ticker
                        from crsp.msenames
                        """, date_cols=['namedt', 'nameendt'])

# if nameendt is missing then set to today date
mse['nameendt']=mse['nameendt'].fillna(pd.to_datetime('today'))

In [23]:
# Merge with SP500 data
sp500_full = pd.merge(sp500, mse, how = 'left', on = 'permno')

# Impose the date range restrictions
sp500_full = sp500_full.loc[(sp500_full.date>=sp500_full.namedt) \
                            & (sp500_full.date<=sp500_full.nameendt)]

In [24]:
sp500_full

Unnamed: 0,permno,start,ending,date,comnam,namedt,nameendt,ticker
3,10104.0,1989-08-03,2022-12-30,2015-01-30,ORACLE CORP,2013-07-15,2022-12-30,ORCL
5,10107.0,1994-06-07,2022-12-30,2015-01-30,MICROSOFT CORP,2004-06-10,2022-12-30,MSFT
10,10138.0,1999-10-13,2022-12-30,2015-01-30,T ROWE PRICE GROUP INC,2015-01-29,2022-08-01,TROW
25,10145.0,1925-12-31,2022-12-30,2015-01-30,HONEYWELL INTERNATIONAL INC,2014-01-27,2017-01-29,HON
35,10147.0,1996-03-28,2016-09-07,2015-01-30,E M C CORP MA,2004-06-10,2016-09-06,EMC
...,...,...,...,...,...,...,...,...
345077,93096.0,2012-12-03,2022-12-30,2022-12-30,DOLLAR GENERAL CORP NEW,2020-02-11,2022-12-30,DG
345079,93132.0,2018-10-11,2022-12-30,2022-12-30,FORTINET INC,2019-08-02,2022-12-30,FTNT
345080,93246.0,2021-03-22,2022-12-30,2022-12-30,GENERAC HOLDINGS INC,2010-02-11,2022-12-30,GNRC
345084,93429.0,2017-03-01,2022-12-30,2022-12-30,C B O E GLOBAL MARKETS INC,2021-03-29,2022-12-30,CBOE


In [26]:
sp500_full[['permno', 'comnam', 'ticker']].drop_duplicates()

Unnamed: 0,permno,comnam,ticker
3,10104.0,ORACLE CORP,ORCL
5,10107.0,MICROSOFT CORP,MSFT
10,10138.0,T ROWE PRICE GROUP INC,TROW
25,10145.0,HONEYWELL INTERNATIONAL INC,HON
35,10147.0,E M C CORP MA,EMC
...,...,...,...
334368,86288.0,COSTAR GROUP INC,CSGP
337857,12476.0,TARGA RESOURCES CORP,TRGP
339198,75607.0,GEN DIGITAL INC,GEN
339604,82276.0,ARCH CAPITAL GROUP LTD NEW,ACGL


In [28]:
sp500_full.to_parquet('data/sp500.parquet.gzip', compression='gzip')

In [29]:
conn.close()