# Finance V: Research Topics in Finance, Risk- and Resource management 
## Replication of paper: Lowry, Michaely & Volkova (2017)

<blockquote>
    Author: Stefan Reimer <br>
    Date: 2019-12-28 <br>
    python version: 3.7 <br>
</blockquote>

In [1]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import re
import collections

import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# import defined functions
from src.functions.functions import (data_import_chunkwise, convert_NAs, get_duplicates, 
                                     find_char_in_colnames, convert_date, convert_price)

# set the settings for displayed dataFrames
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# 1 Load Data

In [2]:
#%%
# define path to files & load data chunkwise

initialFolderPath = "data/initial_data/"
cleanedFolderPath = "data/cleaned_data/"

filePath = initialFolderPath + "000_sdc_full.csv"

sdc_data = data_import_chunkwise(filePath=filePath)

INFO:root:loading started...
INFO:root:loading finished.


The loaded data frame has 43709 rows and 86 columns.


# first data exploration

In [3]:
# show first 5 rows
#print(sdc_data.head(5))

# show last 5 rows
#print(sdc_data.tail(5))

# get key statistics for data
#print(sdc_data.describe())

# show all columns names
#print(sdc_data.columns)

# Question: Does the sample size fit? 
# Are only North American IPOs given?
len_paper = 43816
len_sample = len(sdc_data)

print(f'length of neede sample (from paper): {len_paper}\n'
      f'length of given sample: {len_sample}\n'
     f'difference: {len_paper-len_sample}')

length of neede sample (from paper): 43816
length of given sample: 43709
difference: 107


### +++ Comment +++
the given SDC data sample is not exactly the same as in the inspected paper. 
One reason could be the extraction process of the countries
Lowry et al. picked by excluding, not by including the choosen countries

# 2 SDC data preparation

- Sort dataFrame
- choose only fulfilled IPOs
- convert date information
- convert price information
- create year variable



In [85]:
# sort dataFrame
sdc_data = sdc_data.sort_index()

# convert NAs in OrigIPO
sdc_data = convert_NAs(sdc_data, 'OrigIPO')

# print values of IPO and OrigIPO
print(collections.Counter(sdc_data['OrigIPO']))
print(collections.Counter(sdc_data['IPO']))

# keep only IPOs
sdc_data = sdc_data.loc[(sdc_data['IPO']=='Yes') &
                        (sdc_data['OrigIPO']!='No'), :]
print(len(sdc_data))
# 16,454 should be left.
# TODO: the difference is getting bigger; maybe made changes before the filtering?

# find all columns which contains some date information
date_cols = find_char_in_colnames(sdc_data, 'Date', print_bool= False)

# find all columns which contains some price information
price_cols = find_char_in_colnames(sdc_data, 'Price', print_bool= False)

# find all columns which contains some share information
share_cols = find_char_in_colnames(sdc_data, 'Share', print_bool= False)

# find all columns which contains some overallotment information
overall_cols = find_char_in_colnames(sdc_data, 'Overall', print_bool= False)

# find all columns which contains some round information
round_cols = find_char_in_colnames(sdc_data, 'Round', print_bool= False)

# convert and clean date columns
for date_col in date_cols:
    sdc_data = convert_date(sdc_data, date_col, format='%Y-%m-%d', errors='coerce', print_bool=False)

# convert and clean price columns
for price_col in price_cols:
    sdc_data = convert_price(data = sdc_data, column = price_col, errors='coerce', print_bool=False)

# create year variable
sdc_data['Year'] = sdc_data['IssueDate'].dt.year

0 NAs have been created. 6659 valid values are left. 

Counter({nan: 37050, 'No': 5110, 'Yes': 1549})
Counter({'No': 28400, 'Yes': 15309})
15268


In [88]:
# Question: Is the data horizont enough?
def min_max_print(df, column):
    """
    function to print the min and max value of a column
    @param df: dataFrame containing choosen column
    @param column: column as string, for which the min and max should be printed
    """
    min_value = df[column].min()
    max_value = df[column].max()
    print(f'[{column}] min: {min_value}, max: {max_value}')

# select date range
min_max_print(sdc_data, 'IssueDate')
sdc_data = sdc_data[(sdc_data['IssueDate'] > '1973-1-1') & (sdc_data['IssueDate'] <= '2018-12-31')]
min_max_print(sdc_data, 'IssueDate')

#Answer:
## No, data is needed from 01.01.1973 until 31.12.2016
## additional data needed for 1973 until 1975

[IssueDate] min: 1973-01-02 00:00:00, max: 2019-12-12 00:00:00
[IssueDate] min: 1973-01-02 00:00:00, max: 2018-12-26 00:00:00


In [90]:
# clean the types of securites
length_before = len(sdc_data)
ex_types= ["Units", "Ltd Prtnr Int", "MLP-Common Shs", "Shs Benficl Int",
             "Ltd Liab Int", "Stock Unit", "Trust Units", "Beneficial Ints"]
sdc_data = sdc_data[~sdc_data.Type.isin(ex_types)]
length_after = len(sdc_data)
print(f'before: {length_before} rows. After: {length_after} rows. \n'
     f'{length_before - length_after} are deleted.')

#### Should be "16,454 obs before ---> 15,107 obs after" ####

### drop REIT, Units, ADR, penny stocks and CEF ###
print(len(sdc_data))

# drop REIT - Real Estate Investment Trust
sdc_data = sdc_data[sdc_data['REIT'].isna()]

# drop Unit 
sdc_data = sdc_data[~(sdc_data['Unit'] == 'Yes')]

# drop Depositary (ADR)
### print(collections.Counter(sdc_data['Depositary']))
### TODO: get depositary & delete despositary!!! ####

# filter the offer prices (drop penny stocks)
sdc_data[sdc_data['OfferPrice'].notna()]
sdc_data = sdc_data[sdc_data['OfferPrice']>5]

# filter CEF
sdc_data = sdc_data[sdc_data['CEF'] == 'No']

print(len(sdc_data))

14304
10975


In [91]:
# explore CUSIP
cusip_cols = find_char_in_colnames(sdc_data, 'CUSIP')

### creating 8-digit CUSIP to match with CRSP
sdc_data['CUSIP8'] = sdc_data['CUSIP6'].astype(str) + '10'
CUSIP9_sliced = sdc_data['CUSIP9'].str.slice(0, 8)
sdc_data['CUSIP8'] = sdc_data['CUSIP8'].where(CUSIP9_sliced.isna(), CUSIP9_sliced)

columns containing <<CUSIP>> are:
['CUSIP6' 'CUSIP9']



In [93]:
sdc_data.to_csv(cleanedFolderPath + "sdc_data_cleaned.csv", index=False)

# Match CRSP and SDC Data

In [58]:
# CRSP path to files & load data chunkwise
dtype={'PERMNO': float, 'date': str, 'SHRCD': float, 
       'EXCHCD': float, 'NCUSIP': str, 'CUSIP':str,
       'PRC': float, 'RET':str, 'SHROUT':float, 
       'NCUSIP6':str}

folderPath = "data/initial_data/"
CRSPfilePath = folderPath + 'crsp_data_without_duplicates.csv'
crsp = pd.read_csv(CRSPfilePath, dtype=dtype)

In [59]:
# Quick overview
print(crsp.columns)
print(crsp.head(2))

#convert date
crsp['date'] = pd.to_datetime(crsp['date'])

# sort values for PERMNO and date
crsp = crsp.sort_values(['PERMNO', 'date'])

Index(['Unnamed: 0', 'PERMNO', 'date', 'SHRCD', 'EXCHCD', 'NCUSIP', 'CUSIP',
       'PRC', 'RET', 'SHROUT'],
      dtype='object')
   Unnamed: 0   PERMNO        date  SHRCD  EXCHCD    NCUSIP     CUSIP     PRC  \
0          59  10000.0  1986-01-04   10.0     3.0  68391610  68391610 -4.3125   
1         421  10001.0  1986-01-04   11.0     3.0  39040610  36720410 -6.3125   

         RET  SHROUT  
0  -0.028169  3680.0  
1   0.000000   985.0  


In [60]:
# keep only available prices
print(len(crsp))
crsp = crsp.loc[~crsp['PRC'].isna(), :]
print(len(crsp))

# create NCUSIP6
crsp['NCUSIP6'] = crsp['NCUSIP'].str.slice(0, 6)

# drop first column 'Unnamed: 0'
crsp = crsp.iloc[:, 1:]

#???FILTER FOR crsp <- crsp[SHRCD %in% 10:19 & EXCHCD %in% 1:3] only in PDF???

31574
29816


In [62]:
# create PERMNO_NUCSIP and FirstDate from sdc_data and CRSP
sdc_data = pd.merge(sdc_data,
         crsp[['NCUSIP', 'PERMNO', 'date']], 
         left_on='CUSIP8', right_on='NCUSIP',
                    how='left')
sdc_data = sdc_data.rename(columns={'date': 'First_CRSP_date_ncusip', 'PERMNO': 'Permno_ncusip'})

# create PERMNO_NUCSIP6 and FirstDate from sdc_data and CRSP
sdc_data = pd.merge(sdc_data,
         crsp[['NCUSIP6', 'PERMNO', 'date']], 
         left_on='CUSIP6', right_on='NCUSIP6',
                    how='left')
sdc_data = sdc_data.rename(columns={'date': 'First_CRSP_date_ncusip6', 'PERMNO': 'Permno_ncusip6'})

#print(permno_ncusip['NCUSIP'].isna().sum())

In [64]:
sdc_data['dif'] = sdc_data['First_CRSP_date_ncusip'] - sdc_data['IssueDate']
sdc_data['dif'] = sdc_data['dif'].dt.days

In [65]:
print(sdc_data['First_CRSP_date_ncusip'].isna().sum())
print(sdc_data['dif'].isna().sum())

1700
1700


In [68]:
# take PERMNO_CUSIP under condition
sdc_data['Permno'] = -999
sdc_data['dif'] = sdc_data['First_CRSP_date_ncusip'] - sdc_data['IssueDate']
sdc_data['dif'] = sdc_data['dif'].dt.days
condition = ((sdc_data['dif']>=-1) &
             (sdc_data['dif']<=7) & 
             (sdc_data['Permno'] == -999) &
             (sdc_data['dif'].notna()))
sdc_data['Permno'] = sdc_data['Permno'].where(~condition,
                         sdc_data['Permno_ncusip'])

In [70]:
# take PERMNO_CUSIP6 under condition
sdc_data['dif'] = sdc_data['First_CRSP_date_ncusip6'] - sdc_data['IssueDate']
sdc_data['dif'] = sdc_data['dif'].dt.days
condition = ((sdc_data['dif']>=-1) &
             (sdc_data['dif']<=7) & 
             (sdc_data['Permno'] == -999) &
             (sdc_data['dif'].notna()))
sdc_data['Permno'] = sdc_data['Permno'].where(~condition,
                         sdc_data['Permno_ncusip6'])

In [73]:
sdc_data[sdc_data['Permno'] == -999]

Unnamed: 0,DealNumber,CIK,CUSIP6,CUSIP9,Issuer,FilingDate,IssueDate,IPO,OrigIPO,Type,REIT,ADR,Unit,CEF,MainSICCode,Units,SpinOff,ForeignIssue,TrackingStockIssue,BestEftFirmCmtBghtDl,OfferPrice,SharesOutstandingAfterTheOffering,SharesOutstandingBeforeOffering,SharesOfferedSumOfAllMkts,SharesOfrdIncOverSoldSumOfAllMkts,PrimaryShsOfrdSumOfAllMkts,SecondaryShsOfrdSumOfAllMkts,OverallotAmtOptionSumOfAllMktsMil,OverallotAmtSoldSumOfAllMktsmil,TotGlobalOverallotmentSharesSold,SharesFiledSumOfAllMkts,PrimaryShsFiledSumOfAllMkts,SecondaryShsFiledSumOfAllMkts,AmendedShsFiledSumOfAllMkts,AmendedPrimaryShsFiledSumOfAllMkts,AmendedSecondaryShsFiledSumOfAllMkts,AmendMentDate,AmendHistShsFiledSumOfAllMkts,AmendHistSecShsFiledSumOfAllMkts,AmendHistOverallotShsOptionSumOfAllMkts,TickerAtIssue,TickerUltimateParents,TickerCurrent,TickerSpinOffParent,AllManagers_x,NonBookrunners,AllManagersParentsCode,AllMgrRoleCode,Managers_x,CoManagers,LeadManager,ManagerAgentsLawyersCode,DomesticSyndicateMemberCode,Bookrunners_x,BookrunnersParent,VentureBacked,FirmName,FundName,RoundNumberOfInvesTors,DisclosedRoundTotalmil,RoundDate,TotalKnownAmtInvestedInCompany000,AllManagers_y,Bookrunners_y,LeadManagersLongName,LeadManagers,Managers_y,CoManagersLongName,State,Nation,CurrentExchangeLongDescriptio,AllExchangesWhereIssueWillBeListed_1,AllExchangesWhereIssueWillBeListed_2,ExchangeWhereIssuWillBeLi,AllExchangesWhereIssuersStockTrades,FirstTwoExchangesWhereIssueWillBeListed,SpinParExch,PrimaryExchangeWhereIssuersStockTrades,AmendedHighFilingPrice,AmendedLowFilingPrice,AmendedMiddleOfFilingPrice,HighPriceOfFilingPriceRnge,LowPriceOfFilingPriceRnge,OriginalHighFilingPrice,OriginalLowFilingPrice,OriginalMiddleOfFilingPriceRange,Year,CUSIP8,NCUSIP,Permno_ncusip,First_CRSP_date_ncusip,NCUSIP6,Permno_ncusip6,First_CRSP_date_ncusip6,dif,Permno
1,62322002,,001032,,AES Technology Systems,NaT,1973-01-05,Yes,,Common Shares,,No,No,No,3579,No,N,No,No,,10.5,1.500000e+05,,260000.0,260000.0,150000.0,110000.0,,,,,,,,,,NaT,,,,AEST,AEST,,,RICHARD-ELLIS//PASCUMA,PASCUMA,CB-RICHARD-ELL,BM,RICHARD-ELLIS,PASCUMA,RICHARD-ELLIS,,,RICHARD-ELLIS,CB-RICHARD-ELL,No,,,,,NaT,,"Pascuma, Florsheim",CB Richard Ellis & Co,CB Richard Ellis & Co,RICHARD-ELLIS,"Pascuma, Florsheim & Co.","Pascuma, Florsheim & Co.",Illinois,United States,,O,O,OTC,OTC,OTC,,OTC,,,,,,,,,1973,00103210,00103210,10162.0,1973-01-02,001032,10162.0,1973-01-02,-3.0,-999
2,62324002,,893287,,Trans-National Leasing Inc,NaT,1973-01-05,Yes,,Common Shares,,No,No,No,7515,No,N,No,No,,5.5,2.000000e+05,,200000.0,200000.0,200000.0,0.0,,,,,,,,,,NaT,,,,TNLS,TNLS,,,BROWN-ALLEN,,BROWN-ALLEN,BM,BROWN-ALLEN,,BROWN-ALLEN,,,BROWN-ALLEN,BROWN-ALLEN,No,,,,,NaT,,"Brown, Allen & Co.","Brown, Allen & Co.","Brown, Allen & Co.",BROWN-ALLEN,"Brown, Allen & Co.",,Texas,United States,New York,O,O,OTC,OTC,OTC,,OTC,,,,,,,,,1973,89328710,89328710,76823.0,1973-01-02,893287,76823.0,1973-01-02,-3.0,-999
3,62306002,,913821,913821104,Universal Security Instruments,NaT,1973-01-16,Yes,,Common Shares,,No,No,No,3669,No,N,No,No,,13.0,8.437500e+04,,150000.0,150000.0,150000.0,0.0,,,,,,,,,,NaT,,,,UUU,UUU,UUU,,COENEN,,COENEN,BM,COENEN,,COENEN,,,COENEN,COENEN,No,,,,,NaT,,"Coenen & Co., Inc.","Coenen & Co., Inc.","Coenen & Co., Inc.",COENEN,"Coenen & Co., Inc.",,Maryland,United States,American,A,A,AMEX,American,AMEX,,American,,,,,,,,,1973,91382110,91382110,79573.0,1973-01-02,913821,79573.0,1973-01-02,-14.0,-999
4,62305002,,929236,,WD-40 Co,NaT,1973-01-16,Yes,,Common Shares,,No,No,No,2899,No,N,No,No,,16.5,0.000000e+00,,300000.0,300000.0,40000.0,260000.0,,,,,,,,,,NaT,,,,WDFC,WDFC,WDFC,,BATEMAN,,WF,BM,BATEMAN,,BATEMAN,,,BATEMAN,WF,No,,,,,NaT,,Bateman Eichler Hill Richards,Bateman Eichler Hill Richards Inc,Bateman Eichler Hill Richards Inc,BATEMAN,Bateman Eichler Hill Richards Inc,,California,United States,Nasdaq,NM,NM,NASDQ,Nasdaq,NASDQ,,Nasdaq,,,,,,,,,1973,92923610,92923610,81294.0,1973-01-02,929236,81294.0,1973-01-02,-14.0,-999
5,62292002,,705041,,Peavey Co,NaT,1973-01-18,Yes,,Common Shares,,No,No,No,2041,No,N,No,No,,24.0,5.000000e+04,,525000.0,525000.0,50000.0,475000.0,,,,,,,,,,NaT,,,,PEVY,PEVY,PEVCP,,GS,,GS,BM,GS,,GS,,,GS,GS,No,,,,,NaT,,Goldman Sachs & Co,Goldman Sachs & Co,Goldman Sachs & Co,GS,Goldman Sachs & Co,,Minnesota,United States,New York,NM,NM,NASDQ,Nasdaq,NASDQ,,Nasdaq,,,,,,,,,1973,70504110,70504110,62658.0,1973-01-02,705041,62658.0,1973-01-02,-16.0,-999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11192,3308195002,1743971.0,608012,,Mogu Inc,2018-11-09,2018-12-04,Yes,,ADS,,Yes,No,No,7389,No,N,Yes,No,,14.0,2.558401e+09,255.0,4750000.0,4750000.0,4750000.0,0.0,9.975,,,,,,4750000,4750000,,NaT,4750000,,712500,MOGU,MOGU,MOGU,,MS(JB)/CREDIT-SEC-USA(JB)/CRS-HK(JB),,MORGAN-STANLEY,JB,MS,,MS/CREDIT-SEC-USA/CRS-HK,,,MS/CREDIT-SEC-USA(JB),MORGAN-STANLEY,Yes,Shenzhen Tiantu Capital Management Center LP|P...,Tiantu Capital VI|Ping An Venture Capital Fund...,2|2|2|2|1|5|5|1|5|5|1,"200,000.0|200,000.0|-|-|-|200,000.0|200,000.0|...",NaT,400000.0,Credit Suisse Securities (USA)|China Renaissan...,Credit Suisse Securities (USA) LLC|China Renai...,Credit Suisse Securities (USA) LLC|China Renai...,CREDIT-SEC-USA|CRS-HK,Credit Suisse Securities (USA) LLC|China Renai...,,Foreign,China,New York,N,N,NYSE,New York,NYSE,,New York,16.0000,14.00000,15.00000,16.0,14.0,,,,2018,60801210,60801210,18292.0,2018-06-12,608012,18292.0,2018-06-12,-175.0,-999
11193,3309174002,1609727.0,3H9422,,Synthorx Inc,2018-11-13,2018-12-06,Yes,,Common Shares,,No,No,No,2836,No,N,No,No,,11.0,3.201723e+07,1.0,11912727.0,13699636.0,11912727.0,0.0,19.656,19.656,1786909,,,,11912727,11912727,,NaT,11912727,,1786909,THOR,THOR,THOR,,JEFFERIES-LLC(JB)/LEERINK-PARTNER(JB),H-C-WAINWRIGHT,JEFFERIES-LLC,JB,JEFFERIES-LLC,,JEFFERIES-LLC/LEERINK-PARTNER,,,JEFFERIES-LLC,JEFFERIES-LLC,Yes,Undisclosed Firm|Undisclosed Firm|Correlation ...,Undisclosed Fund|Undisclosed Fund|Correlation ...,5|5|8|8|8|8|8|8|8|8|5|5|5|6|6|6|3|3|3|5|5|5|5|...,"5,000.0|5,000.0|-|-|-|-|-|-|-|-|5,000.0|5,000....",NaT,80287.0,Leerink Partners LLC|Evercore Group|HC Wainwri...,Leerink Partners LLC|Evercore Group,Leerink Partners LLC|Evercore Group|HC Wainwri...,LEERINK-PARTNER|EVERCORE-GROUP|H-C-WAINWRIGHT,Leerink Partners LLC|Evercore Group|HC Wainwri...,,California,United States,Nasdaq,NM,NM,NASDQ,Nasdaq,NASDQ,,Nasdaq,12.0000,10.00000,11.00000,12.0,10.0,,,,2018,3H942210,,,NaT,,,NaT,,-999
11194,3287319002,1682852.0,60770K,,Moderna Inc,2018-09-13,2018-12-06,Yes,,Common Shares,,No,No,No,8731,No,N,No,No,,23.0,3.289481e+08,30.0,26275993.0,26275993.0,26275993.0,0.0,90.652,,,,,,26275993,26275993,,NaT,"21,739,131|21,739,131|26,275,993",-|-|-,"-|-|3,941,398",MRNA,MRNA,MRNA,,MS(JB)/GS(JB)/JPM-SEC-LLC(JB)/ML(JB),ODDO-BHF-SCA/OPPENHEIMER-CO/NEEDHAM/,MORGAN-STANLEY,JB,MS,ODDO-BHF-SCA/OPPENHEIMER-CO/NEEDHAM/,MS/GS/JPM-SEC-LLC/ML/PIPER-JAFFRAY,,,MS/GS(JB),MORGAN-STANLEY,Yes,Undisclosed Firm|Undisclosed Firm|Flagship Pio...,Undisclosed Fund|Undisclosed Fund|Flagship Pio...,2|3|3|3|6|6|6|6|6|6|1|1|1|1|10|10|10|10|10|10|...,"40,000.0|474,000.0|474,000.0|474,000.0|446,069...",NaT,1720068.8,Goldman Sachs & Co|JP Morgan Securities LLC|Me...,Goldman Sachs & Co|JP Morgan Securities LLC|Me...,Goldman Sachs & Co|JP Morgan Securities LLC|Me...,GS|JPM-SEC-LLC|ML|PIPER-JAFFRAY|BARCLAYS-CAP,Goldman Sachs & Co|JP Morgan Securities LLC|Me...,Oppenheimer & Co Inc|Needham & Co LLC|Chardan ...,Massachusetts,United States,Nasdaq,NM,NM,NASDQ,Nasdaq,NASDQ,,Nasdaq,17.9399,16.44491,17.19241,,,,,,2018,60770K10,60770K10,18312.0,2018-07-12,60770K,18312.0,2018-07-12,-147.0,-999
11198,3284645002,1734005.0,6H6317,,Aptorum Grp Ltd,2018-09-05,2018-12-17,Yes,,Class A Shares,,No,No,No,2834,No,N,Yes,No,,15.8,7.325115e+06,,1898734.0,1898734.0,1898734.0,0.0,,,,,,,632912,632912,,NaT,"632,912|632,912",-|-,-|-,APM,APM,,,BOUSTEAD-SEC(JB)/CRS-HK(JB),,BOUSTEAD-SEC,JB,BOUSTEAD-SEC,,BOUSTEAD-SEC/CRS-HK/AMTD-ASSET,,,BOUSTEAD-SEC/CRS-HK(JB),BOUSTEAD-SEC,No,,,,,NaT,,China Renaissance Sec(HK)Ltd|AMTD ASSET MANAGE...,China Renaissance Securities(Hong Kong)Ltd|AMT...,China Renaissance Securities(Hong Kong)Ltd|AMT...,CRS-HK|AMTD-ASSET,China Renaissance Securities(Hong Kong)Ltd|AMT...,,Foreign,Hong Kong,,NM,NM,NASDQ,Nasdaq,NASDQ,,Nasdaq,15.8000,15.80000,15.80000,,,,,,2018,6H631710,,,NaT,,,NaT,,-999


In [82]:
only_relevant_values = sdc_data[['CUSIP6', 'IssueDate', 'Permno_ncusip', 'First_CRSP_date_ncusip', 'NCUSIP6', 'Permno_ncusip6', 'First_CRSP_date_ncusip6', 'dif' ]]
only_relevant_values.head(100)

Unnamed: 0,CUSIP6,IssueDate,Permno_ncusip,First_CRSP_date_ncusip,NCUSIP6,Permno_ncusip6,First_CRSP_date_ncusip6,dif
0,655312,1973-01-02,58317.0,1973-01-02,655312,58317.0,1973-01-02,0.0
1,001032,1973-01-05,10162.0,1973-01-02,001032,10162.0,1973-01-02,-3.0
2,893287,1973-01-05,76823.0,1973-01-02,893287,76823.0,1973-01-02,-3.0
3,913821,1973-01-16,79573.0,1973-01-02,913821,79573.0,1973-01-02,-14.0
4,929236,1973-01-16,81294.0,1973-01-02,929236,81294.0,1973-01-02,-14.0
5,705041,1973-01-18,62658.0,1973-01-02,705041,62658.0,1973-01-02,-16.0
6,719207,1973-01-18,62755.0,1973-01-02,719207,62755.0,1973-01-02,-16.0
7,846246,1973-01-23,71926.0,1973-01-02,846246,71926.0,1973-01-02,-21.0
8,737516,1973-01-24,63730.0,1973-01-03,737516,63730.0,1973-01-03,-21.0
9,740814,1973-02-01,63934.0,1973-01-03,740814,63934.0,1973-01-03,-29.0


In [78]:
collections.Counter(sdc_data['dif'])

Counter({0.0: 137,
         -3.0: 4,
         -14.0: 3,
         -16.0: 13,
         -21.0: 12,
         -29.0: 77,
         -36.0: 43,
         -41.0: 18,
         -49.0: 5,
         -53.0: 27,
         10144.0: 1,
         -70.0: 23,
         -77.0: 27,
         -95.0: 16,
         -100.0: 18,
         1672.0: 1,
         -209.0: 41,
         -246.0: 10,
         -251.0: 25,
         -259.0: 56,
         -280.0: 47,
         -232.0: 6,
         -248.0: 14,
         5069.0: 1,
         -55.0: 13,
         -61.0: 36,
         -69.0: 34,
         -161.0: 50,
         -154.0: 40,
         -163.0: 48,
         -282.0: 43,
         -5.0: 5,
         -12.0: 9,
         -73.0: 18,
         -85.0: 20,
         -91.0: 22,
         5025.0: 1,
         -126.0: 40,
         nan: 1,
         -148.0: 36,
         -153.0: 24,
         -174.0: 58,
         -214.0: 44,
         -216.0: 30,
         -235.0: 10,
         -249.0: 14,
         -262.0: 49,
         -1436.0: 1,
         19.0: 1,
         -4

In [55]:
# exclude observations without CRSP record
test = sdc_data[sdc_data['Permno'] != -999]

In [56]:
sdc_data

Unnamed: 0,DealNumber,CIK,CUSIP6,CUSIP9,Issuer,FilingDate,IssueDate,IPO,OrigIPO,Type,REIT,ADR,Unit,CEF,MainSICCode,Units,SpinOff,ForeignIssue,TrackingStockIssue,BestEftFirmCmtBghtDl,OfferPrice,SharesOutstandingAfterTheOffering,SharesOutstandingBeforeOffering,SharesOfferedSumOfAllMkts,SharesOfrdIncOverSoldSumOfAllMkts,PrimaryShsOfrdSumOfAllMkts,SecondaryShsOfrdSumOfAllMkts,OverallotAmtOptionSumOfAllMktsMil,OverallotAmtSoldSumOfAllMktsmil,TotGlobalOverallotmentSharesSold,SharesFiledSumOfAllMkts,PrimaryShsFiledSumOfAllMkts,SecondaryShsFiledSumOfAllMkts,AmendedShsFiledSumOfAllMkts,AmendedPrimaryShsFiledSumOfAllMkts,AmendedSecondaryShsFiledSumOfAllMkts,AmendMentDate,AmendHistShsFiledSumOfAllMkts,AmendHistSecShsFiledSumOfAllMkts,AmendHistOverallotShsOptionSumOfAllMkts,TickerAtIssue,TickerUltimateParents,TickerCurrent,TickerSpinOffParent,AllManagers_x,NonBookrunners,AllManagersParentsCode,AllMgrRoleCode,Managers_x,CoManagers,LeadManager,ManagerAgentsLawyersCode,DomesticSyndicateMemberCode,Bookrunners_x,BookrunnersParent,VentureBacked,FirmName,FundName,RoundNumberOfInvesTors,DisclosedRoundTotalmil,RoundDate,TotalKnownAmtInvestedInCompany000,AllManagers_y,Bookrunners_y,LeadManagersLongName,LeadManagers,Managers_y,CoManagersLongName,State,Nation,CurrentExchangeLongDescriptio,AllExchangesWhereIssueWillBeListed_1,AllExchangesWhereIssueWillBeListed_2,ExchangeWhereIssuWillBeLi,AllExchangesWhereIssuersStockTrades,FirstTwoExchangesWhereIssueWillBeListed,SpinParExch,PrimaryExchangeWhereIssuersStockTrades,AmendedHighFilingPrice,AmendedLowFilingPrice,AmendedMiddleOfFilingPrice,HighPriceOfFilingPriceRnge,LowPriceOfFilingPriceRnge,OriginalHighFilingPrice,OriginalLowFilingPrice,OriginalMiddleOfFilingPriceRange,Year,CUSIP8,Permno_ncusip,First_CRSP_date_ncusip,Permno_ncusip6,First_CRSP_date_ncusip6,Permno,dif
0,62319002,,655312,,Nolex Corp,NaT,1973-01-02,Yes,,Common Shares,,No,No,No,5111,No,N,No,No,,7.0,500000.0,,500000.0,500000.0,500000.0,0.0,,,,,,,,,,NaT,,,,NLX,NLX,,,SHAPIRO,,SHAPIRO,BM,SHAPIRO,,SHAPIRO,,,SHAPIRO,SHAPIRO,No,,,,,NaT,,J. Shapiro,J. Shapiro Co.,J. Shapiro Co.,SHAPIRO,J. Shapiro Co.,,California,United States,New York,A,A,AMEX,American,AMEX,,American,,,,,,,,,1973,65531210,58317.0,1973-01-02,58317.0,1973-01-02,58317,0.0


In [71]:
# create initial returns
close_price = pd.merge(sdc_data, crsp_PERMNO[['PERMNO', 'PRC']], left_on='Permno', right_on='PERMNO')
sdc_data['close_price'] = np.absolute(close_price)
sdc_data['IR'] = (sdc_data['close_price']/sdc_data['OfferPrice']-1)

ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series

In [94]:
sdc_data.columns

Index(['DealNumber', 'CIK', 'CUSIP6', 'CUSIP9', 'Issuer', 'FilingDate',
       'IssueDate', 'IPO', 'OrigIPO', 'Type', 'REIT', 'ADR', 'Unit', 'CEF',
       'MainSICCode', 'Units', 'SpinOff', 'ForeignIssue', 'TrackingStockIssue',
       'BestEftFirmCmtBghtDl', 'OfferPrice',
       'SharesOutstandingAfterTheOffering', 'SharesOutstandingBeforeOffering',
       'SharesOfferedSumOfAllMkts', 'SharesOfrdIncOverSoldSumOfAllMkts',
       'PrimaryShsOfrdSumOfAllMkts', 'SecondaryShsOfrdSumOfAllMkts',
       'OverallotAmtOptionSumOfAllMktsMil', 'OverallotAmtSoldSumOfAllMktsmil',
       'TotGlobalOverallotmentSharesSold', 'SharesFiledSumOfAllMkts',
       'PrimaryShsFiledSumOfAllMkts', 'SecondaryShsFiledSumOfAllMkts',
       'AmendedShsFiledSumOfAllMkts', 'AmendedPrimaryShsFiledSumOfAllMkts',
       'AmendedSecondaryShsFiledSumOfAllMkts', 'AmendMentDate',
       'AmendHistShsFiledSumOfAllMkts', 'AmendHistSecShsFiledSumOfAllMkts',
       'AmendHistOverallotShsOptionSumOfAllMkts', 'TickerAtIssue',
   

### 11,103 obs ---> 8,995 obs

### matching CRSP infor
m <- match(ipo$Permno, crsp$PERMNO)
ipo[, `:=` (First_CRSP_date = ymd(crsp$date[m]), Close_price1 = abs(crsp$PRC[m]), Close_price2 = abs(crsp$PRC[m + 1]))]

### removing extra variables
ipo[,`:=`(First_CRSP_date_ncusip = NULL,First_CRSP_date_ncusip6 = NULL, Permno_ncusip = NULL, Permno_ncusip6 = NULL, dif = NULL)]
ipo[, `:=`(REIT = NULL, Unit = NULL, Depositary = NULL, CEF = NULL, CUSIP = NULL, CUSIP9 = NULL)]
ipo[, `:=`(IPO_Flag = NULL, Original_IPO_Flag = NULL)]


### Dropping wrong share clases and shares traded on other exchanges
### Loading CRSP Stock Header Information file
crsp.info <- fread(crsp.info.datafile, select = c("PERMNO", "HSHRCD", "HEXCD"))
m <- match(ipo$Permno, crsp.info$PERMNO)
ipo[, `:=` (exch = crsp.info$HEXCD[m], shrcd = crsp.info$HSHRCD[m])]

### removing extra variables
for drop_column in ['REIT', 'Unit', 'CEF', 'CUSIP6', 'CUSIP8', 'CUSIP9', 'IPO', 'OrigIPO']:
    # First_CRSP_date_ncusip, First_CRSP_date_ncusip6, Permno_ncusip, Permno_ncusip6, dif, Depositary
    sdc_data = sdc_data.drop(columns=drop_column)

sdc_data.to_csv(folderPath + "data_prepared.csv")