In [77]:
import wrds
import pandas as pd
import pytz
import datetime as dt
import pandas_market_calendars as mcal
db = wrds.Connection(wrds_username = "connorwz")

Loading library list...
Done


In [78]:
mapping_file_query = """ 
                    SELECT DISTINCT 
                    a.permno,a.comnam,b.rp_entity_id
                    FROM (SELECT permno,ncusip,comnam FROM crsp.dse
                    WHERE ncusip IS NOT NULL) as a,
                    (SELECT rp_entity_id,cusip,entity_name FROM rpna.wrds_rpa_company_mappings 
                    WHERE cusip IS NOT NULL) as b
                    WHERE a.ncusip=substr(b.cusip,1,8)
"""
mapping_file = db.raw_sql(mapping_file_query)

In [79]:
mapping_file.head()

Unnamed: 0,permno,comnam,rp_entity_id
0,10012,D P A C TECHNOLOGIES CORP,EEF6A8
1,10026,J & J SNACK FOODS CORP,B72BE9
2,10028,ENVELA CORP,B9BDE8
3,10031,A A IMPORTING INC,91E312
4,10032,PLEXUS CORP,A2CAF7


In [80]:
# No permno is mapped to more than one RP entity_ids 
mapping_file.groupby("permno").filter(lambda sub:len(sub.rp_entity_id.unique()) > 1).sort_values(by = "permno")

Unnamed: 0,permno,comnam,rp_entity_id


In [81]:
# Some RP entity_ids are mapped to more than one permnos
entity_id_mapped_morethan1 = mapping_file.groupby("rp_entity_id").filter(lambda sub:len(sub.permno.unique()) > 1).sort_values(by = "rp_entity_id")
entity_id_mapped_morethan1.head()

Unnamed: 0,permno,comnam,rp_entity_id
494,13452,TRONOX HOLDINGS PLC,00D6B5
7941,91153,TRONOX INC,00D6B5
7969,91277,QURATE RETAIL INC Q V C GROUP,0C6861
7967,91277,QURATE RETAIL GROUP INC,0C6861
7968,91277,QURATE RETAIL INC,0C6861


In [82]:
# There are many reasons resulting in this; For example, "0C6861" has 2 permnos becuase of A and B share classes
entity_id_mapped_morethan1.groupby("rp_entity_id").get_group("0C6861")

Unnamed: 0,permno,comnam,rp_entity_id
7969,91277,QURATE RETAIL INC Q V C GROUP,0C6861
7967,91277,QURATE RETAIL GROUP INC,0C6861
7968,91277,QURATE RETAIL INC,0C6861
7972,91278,QURATE RETAIL INC Q V C GROUP,0C6861
7971,91278,QURATE RETAIL INC,0C6861
7970,91278,QURATE RETAIL GROUP INC,0C6861


In [83]:
# For "16AD58", it has 2 permnos because it changed its primary listing from the London Stock Exchange (LSE) 
# to the New York Stock Exchange (NYSE) on September 25, 2023
entity_id_mapped_morethan1.groupby("rp_entity_id").get_group("16AD58")

Unnamed: 0,permno,comnam,rp_entity_id
4143,24294,C R H PLC NEW,16AD58
5087,75650,C R H PLC,16AD58


In [84]:
# All SP500 companies between 2000 and 2023
sp_500_20_cen_query = f"""SELECT distinct permno FROM crsp.dsp500list
                        WHERE ending >= '01/01/2000' and start <= '12/31/2023'
                """
sp_500_20_cen = db.raw_sql(sp_500_20_cen_query)

In [85]:
# Some SP500 constituents between 2000 and 2023 are mapped to more than one rp_entity_id
pd.Series(entity_id_mapped_morethan1.permno.unique()).isin(sp_500_20_cen.permno).sum()

16

In [86]:
entity_id_mapped_morethan1[entity_id_mapped_morethan1.permno.isin(sp_500_20_cen.permno)].sort_values("rp_entity_id")
# As we tell below, some of companies with more than one permnos has only one permno selected for SP500 constituents which will be fine if we
# restrict mapping file to SP500 constituents only

Unnamed: 0,permno,comnam,rp_entity_id
6212,83443,BERKSHIRE HATHAWAY INC DEL,168A5D
5167,76226,PARAMOUNT GLOBAL,3C7F5F
5168,76226,VIACOMCBS INC,3C7F5F
7887,90979,UNDER ARMOUR INC,41EC04
1282,15980,UNDER ARMOUR INC,41EC04
7655,90319,ALPHABET INC,4A6F00
824,14542,ALPHABET INC,4A6F00
4712,59248,MOLSON COORS BREWING CO,78F9ED
4711,59248,MOLSON COORS BEVERAGE CO,78F9ED
2022,18421,FOX CORP,7BFF81


In [87]:
mapping_file = mapping_file[["permno","rp_entity_id"]]
SP500_mapping_file = mapping_file[mapping_file.permno.isin(sp_500_20_cen.permno)]
SP500_mapping_file.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  SP500_mapping_file.drop_duplicates(inplace=True)


In [89]:
# These are companies that more than one permnos are selected as SP500 constituents and all of them are due to multiple share classes
SP500_mapping_file.groupby("rp_entity_id").filter(lambda sub:sub.shape[0] > 1).sort_values("rp_entity_id")

Unnamed: 0,permno,rp_entity_id
1282,15980,41EC04
7887,90979,41EC04
824,14542,4A6F00
7655,90319,4A6F00
2021,18420,7BFF81
2022,18421,7BFF81
642,13963,DD1BA1
643,13964,DD1BA1


In [90]:
SP500_mapping_file.to_csv("SP500_Mapping_file.csv",index=False)