In [9]:

import pyreadstat
import pandas as pd
import numpy as np

# Define the path to your SAS file
funda = "/wrds/comp/sasdata/d_na/funda.sas7bdat"

In [10]:
columns_to_read = ['gvkey', 'datadate', 'at', 'pstkl', 'txditc', 'pstkrv', 'seq', 'pstk', 'indfmt', 'datafmt', 'popsrc', 'consol']

# Read the specified columns
df, meta = pyreadstat.read_file_multiprocessing(pyreadstat.read_sas7bdat, funda, num_processes=32)

df = df[columns_to_read]
print(meta.column_names)

# Convert 'datadate' from SAS date to datetime
# SAS epoch is January 1, 1960
sas_epoch = pd.to_datetime('1960-01-01')
df['datadate'] = sas_epoch + pd.to_timedelta(df['datadate'], unit='D')

# Apply filters
comp = df[
    (df['indfmt'] == 'INDL') &
    (df['datafmt'] == 'STD') &
    (df['popsrc'] == 'D') &
    (df['consol'] == 'C') &
    (df['datadate'] >= '1959-01-01') 
]

print(df.columns)

comp['year']=comp['datadate'].dt.year

# create preferrerd stock
comp['ps']=np.where(comp['pstkrv'].isnull(), comp['pstkl'], comp['pstkrv'])
comp['ps']=np.where(comp['ps'].isnull(),comp['pstk'], comp['ps'])
comp['ps']=np.where(comp['ps'].isnull(),0,comp['ps'])

comp['txditc']=comp['txditc'].fillna(0)

# create book equity
comp['be']=comp['seq']+comp['txditc']-comp['ps']
comp['be']=np.where(comp['be']>0, comp['be'], np.nan)

# number of years in Compustat
comp=comp.sort_values(by=['gvkey','datadate'])
comp['count']=comp.groupby(['gvkey']).cumcount()

comp=comp[['gvkey','datadate','year','be','count']]

print(comp)


['gvkey', 'datadate', 'fyear', 'indfmt', 'consol', 'popsrc', 'datafmt', 'tic', 'cusip', 'conm', 'acctchg', 'acctstd', 'acqmeth', 'adrr', 'ajex', 'ajp', 'bspr', 'compst', 'curcd', 'curncd', 'currtr', 'curuscn', 'final', 'fyr', 'ismod', 'ltcm', 'ogm', 'pddur', 'scf', 'src', 'stalt', 'udpl', 'upd', 'apdedate', 'fdate', 'pdate', 'acchg', 'acco', 'accrt', 'acdo', 'aco', 'acodo', 'acominc', 'acox', 'acoxar', 'acqao', 'acqcshi', 'acqgdwl', 'acqic', 'acqintan', 'acqinvt', 'acqlntal', 'acqniintc', 'acqppe', 'acqsc', 'act', 'adpac', 'aedi', 'afudcc', 'afudci', 'aldo', 'am', 'amc', 'amdc', 'amgw', 'ano', 'ao', 'aocidergl', 'aociother', 'aocipen', 'aocisecgl', 'aodo', 'aol2', 'aoloch', 'aox', 'ap', 'apalch', 'apb', 'apc', 'apofs', 'aqa', 'aqc', 'aqd', 'aqeps', 'aqi', 'aqp', 'aqpl1', 'aqs', 'arb', 'arc', 'arce', 'arced', 'arceeps', 'artfs', 'at', 'aul3', 'autxr', 'balr', 'banlr', 'bast', 'bastr', 'batr', 'bcef', 'bclr', 'bcltbl', 'bcnlr', 'bcrbl', 'bct', 'bctbl', 'bctr', 'billexce', 'bkvlps', 'bltb

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comp['year']=comp['datadate'].dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comp['ps']=np.where(comp['pstkrv'].isnull(), comp['pstkl'], comp['pstkrv'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comp['ps']=np.where(comp['ps'].isnull(),comp['pstk'], comp['ps'])
A value is trying to be s

         gvkey   datadate  year      be  count
0       001000 1961-12-31  1961     NaN      0
1       001000 1962-12-31  1962     NaN      1
2       001000 1963-12-31  1963   0.561      2
3       001000 1964-12-31  1964   0.627      3
4       001000 1965-12-31  1965   0.491      4
...        ...        ...   ...     ...    ...
901626  356289 2023-06-30  2023  16.393      2
901628  356289 2024-06-30  2024  17.437      3
901630  356687 2021-12-31  2021     NaN      0
901632  356687 2022-12-31  2022  41.683      1
901634  356687 2023-12-31  2023  87.766      2

[565812 rows x 5 columns]
