In [35]:
import pandas as pd
import numpy as np

In [45]:
df = pd.read_parquet('data/merged_panel.parquet')

In [46]:
df.columns

Index(['date', 'permno', 'datadate', 'effective_date', 'at', 'ceq', 'revt',
       'oiadpq', 'ibq', 'oancf', 'capx', 'dltt', 'lt', 'che', 'csho', 'prcc',
       'adj_close', 'shares_outstanding', 'book_to_price', 'earnings_to_price',
       'sales_to_price', 'cf_to_price', 'price_to_book', 'mkt_rf', 'rf', 'smb',
       'hml', 'umd', 'mkt_rf_znorm_w504', 'smb_znorm_w504', 'hml_znorm_w504',
       'umd_znorm_w504', 'ret', 'excess_ret', 'open', 'low', 'high', 'volume',
       'mkt_cap', 'ticker', 'log_ret_1d', 'alpha_cs', 'beta_mkt_rf',
       'beta_smb', 'beta_hml', 'beta_umd'],
      dtype='object')

In [47]:
df = df.drop(columns=['permno','datadate','effective_date'])



In [48]:
drop_factors = [
    'mkt_rf','smb','hml','umd',
    'mkt_rf_znorm_w504','smb_znorm_w504','hml_znorm_w504','umd_znorm_w504',
    'rf','excess_ret','log_ret_1d'
]

In [49]:
df = df.drop(columns=drop_factors)

In [50]:
df

Unnamed: 0,date,at,ceq,revt,oiadpq,ibq,oancf,capx,dltt,lt,...,low,high,volume,mkt_cap,ticker,alpha_cs,beta_mkt_rf,beta_smb,beta_hml,beta_umd
0,2010-01-04,53833.0,27531.0,5857.0,2302.0,1458.0,1097.25,25.0,13751.0,25918.0,...,24.66,25.19,26919420.0,124528817000.0,ORCL,,,,,
1,2010-01-05,53833.0,27531.0,5857.0,2302.0,1458.0,1097.25,25.0,13751.0,25918.0,...,24.35,24.85,28782092.0,124378480400.0,ORCL,,,,,
2,2010-01-06,53833.0,27531.0,5857.0,2302.0,1458.0,1097.25,25.0,13751.0,25918.0,...,24.38,24.915,24666924.0,122574441200.0,ORCL,,,,,
3,2010-01-07,53833.0,27531.0,5857.0,2302.0,1458.0,1097.25,25.0,13751.0,25918.0,...,24.08,24.61,30567536.0,122173543600.0,ORCL,,,,,
4,2010-01-08,53833.0,27531.0,5857.0,2302.0,1458.0,1097.25,25.0,13751.0,25918.0,...,24.25,24.75,23638452.0,123676909600.0,ORCL,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47994,2024-12-24,119852.0,69931.0,25182.0,2772.0,2173.0,2527.25,2140.5,9695.0,49142.0,...,435.14001,462.78,59351506.0,1483946536800.0,TSLA,0.001543,0.025927,-0.000616,-0.005964,-0.011056
47995,2024-12-26,119852.0,69931.0,25182.0,2772.0,2173.0,2527.25,2140.5,9695.0,49142.0,...,451.01999,465.3299,76392273.0,1457784547800.0,TSLA,0.001618,0.025673,-0.000538,-0.005928,-0.010863
47996,2024-12-27,119852.0,69931.0,25182.0,2772.0,2173.0,2527.25,2140.5,9695.0,49142.0,...,426.5,450.0,82370345.0,1385654499600.0,TSLA,0.001609,0.025809,-0.000543,-0.005887,-0.010749
47997,2024-12-30,119852.0,69931.0,25182.0,2772.0,2173.0,2527.25,2140.5,9695.0,49142.0,...,415.75,427.0,64705452.0,1339911144600.0,TSLA,0.001812,0.025703,-0.000549,-0.005459,-0.010142


In [51]:
df.columns

Index(['date', 'at', 'ceq', 'revt', 'oiadpq', 'ibq', 'oancf', 'capx', 'dltt',
       'lt', 'che', 'csho', 'prcc', 'adj_close', 'shares_outstanding',
       'book_to_price', 'earnings_to_price', 'sales_to_price', 'cf_to_price',
       'price_to_book', 'ret', 'open', 'low', 'high', 'volume', 'mkt_cap',
       'ticker', 'alpha_cs', 'beta_mkt_rf', 'beta_smb', 'beta_hml',
       'beta_umd'],
      dtype='object')

In [54]:

FUNDAMENTALS = ['at','ceq','revt','oiadpq','ibq','oancf','capx','dltt','lt','che','csho']
RATIOS       = ['book_to_price','earnings_to_price','sales_to_price','cf_to_price','price_to_book']
PRICES       = ['prcc','adj_close','open','low','high']
RETURNS      = ['ret']
META         = ['ticker','shares_outstanding','mkt_cap','alpha_cs','beta_mkt_rf','beta_smb','beta_hml','beta_umd']

def _ret_prod(x):
    x = x.dropna()
    return (x.add(1).prod() - 1) if len(x) else np.nan

def resample_weekly(df):
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values(['ticker','date'], inplace=True)

    # Forward-fill fundamentals within each ticker so "last" is as-of
    for col in FUNDAMENTALS:
        df[col] = df.groupby('ticker', group_keys=False)[col].ffill()

    agg_map = {
        # Prices
        'open'        : 'first',
        'high'        : 'max',
        'low'         : 'min',
        'adj_close'   : 'last',
        'prcc'        : 'last',

        # Volume & caps
        'volume'              : 'sum',
        'shares_outstanding'  : 'last',
        'mkt_cap'             : 'last',

        # Returns
        'ret'        : _ret_prod,   # compound simple returns over the week
        # 'log_ret_1d' : 'sum',       # log returns add

        # Fundamentals (levels, as-of)
        'at'   : 'last', 'ceq' : 'last', 'revt' : 'last', 'oiadpq' : 'last',
        'ibq'  : 'last', 'oancf': 'last', 'capx' : 'last', 'dltt'   : 'last',
        'lt'   : 'last', 'che' : 'last', 'csho' : 'last',

        # Ratios (as-of)
        'book_to_price'   : 'last',
        'earnings_to_price': 'last',
        'sales_to_price'  : 'last',
        'cf_to_price'     : 'last',
        'price_to_book'   : 'last',

        # Alpha / betas
        'alpha_cs'  : 'mean',   # or 'last' if you prefer
        'beta_mkt_rf': 'last',
        'beta_smb'   : 'last',
        'beta_hml'   : 'last',
        'beta_umd'   : 'last',
    }

    weekly = (
        df.set_index('date')
          .groupby('ticker')
          .resample('W-FRI')
          .agg(agg_map)
          .reset_index()
          .sort_values(['ticker','date'])
    )

    # Optional: recompute end-of-week market cap from end-of-week price & shares
    weekly['mkt_cap_eow'] = weekly['adj_close'] * weekly['shares_outstanding']

    # Optional: next-week label
    weekly['ret_next'] = weekly.groupby('ticker')['ret'].shift(-1)

    return weekly

In [55]:
df = resample_weekly(df)

In [56]:
df.columns

Index(['ticker', 'date', 'open', 'high', 'low', 'adj_close', 'prcc', 'volume',
       'shares_outstanding', 'mkt_cap', 'ret', 'at', 'ceq', 'revt', 'oiadpq',
       'ibq', 'oancf', 'capx', 'dltt', 'lt', 'che', 'csho', 'book_to_price',
       'earnings_to_price', 'sales_to_price', 'cf_to_price', 'price_to_book',
       'alpha_cs', 'beta_mkt_rf', 'beta_smb', 'beta_hml', 'beta_umd',
       'mkt_cap_eow', 'ret_next'],
      dtype='object')

In [57]:
df.to_parquet('data/processed_weekly_panel.parquet')

In [1]:
import pandas as pd
df = pd.read_parquet('data/processed_weekly_panel.parquet')
df

Unnamed: 0,ticker,date,open,high,low,adj_close,prcc,volume,shares_outstanding,mkt_cap,...,sales_to_price,cf_to_price,price_to_book,alpha_cs,beta_mkt_rf,beta_smb,beta_hml,beta_umd,mkt_cap_eow,ret_next
0,AAPL,2010-01-08,213.42999,215.59,209.05,211.98,167.440,94902214.0,906282000.0,192113658360.0,...,0.050327,0.008936,7.558326,,,,,,192113658360.0,0.0
1,AAPL,2010-01-15,212.8,213.0,204.10001,205.92999,167.440,98861153.0,906795000.0,186736285282.050018,...,0.050327,0.008936,7.558326,,,,,,186736285282.050018,0.0
2,AAPL,2010-01-22,208.33,215.55,197.16,197.75,167.440,103723540.0,906795000.0,179318711250.0,...,0.050327,0.008936,7.558326,,,,,,179318711250.0,-0.028759
3,AAPL,2010-01-29,202.50999,213.71001,190.25,192.063,210.732,260102970.0,906795000.0,174161768085.0,...,0.082117,0.007567,5.339483,,,,,,174161768085.0,0.017687
4,AAPL,2010-02-05,192.37,200.2,190.85001,195.46001,210.732,134766042.0,906795000.0,177242159767.950012,...,0.082117,0.007567,5.339483,,,,,,177242159767.950012,0.025171
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10132,UBER,2024-12-06,71.93,73.44,64.28,66.09,75.160,155331834.0,2105709000.0,139166307810.0,...,0.070743,0.008516,10.700272,0.001137,0.013682,0.003829,-0.002820,0.001225,139166307810.0,-0.093206
10133,UBER,2024-12-13,65.5,66.17,59.74,59.93,75.160,215363841.0,2105709000.0,126195140370.0,...,0.070743,0.008516,10.700272,0.001039,0.013401,0.003703,-0.002743,0.000655,126195140370.0,0.013349
10134,UBER,2024-12-20,60.31,63.94,59.33,60.73,75.160,174563671.0,2105709000.0,127879707570.0,...,0.070743,0.008516,10.700272,0.001079,0.012365,0.003619,-0.002753,0.000413,127879707570.0,0.006587
10135,UBER,2024-12-27,60.88,62.22,60.02,61.13,75.160,50143475.0,2105709000.0,128721991170.0,...,0.070743,0.008516,10.700272,0.001199,0.012194,0.003583,-0.002727,0.000350,128721991170.0,-0.01325


In [3]:
df.iloc[:, 6:20]


Unnamed: 0,prcc,volume,shares_outstanding,mkt_cap,ret,at,ceq,revt,oiadpq,ibq,oancf,capx,dltt,lt
0,167.440,94902214.0,906282000.0,192113658360.0,0.0,31709.0,19622.0,7464.0,1392.0,1072.0,1325.25,172.0,0.0,12087.0
1,167.440,98861153.0,906795000.0,186736285282.050018,0.0,31709.0,19622.0,7464.0,1392.0,1072.0,1325.25,172.0,0.0,12087.0
2,167.440,103723540.0,906795000.0,179318711250.0,0.0,31709.0,19622.0,7464.0,1392.0,1072.0,1325.25,172.0,0.0,12087.0
3,210.732,260102970.0,906795000.0,174161768085.0,-0.028759,53926.0,35768.0,15683.0,4725.0,3378.0,1445.25,94.0,0.0,18158.0
4,210.732,134766042.0,906795000.0,177242159767.950012,0.017687,53926.0,35768.0,15683.0,4725.0,3378.0,1445.25,94.0,0.0,18158.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10132,75.160,155331834.0,2105709000.0,139166307810.0,-0.081573,47117.0,14780.0,11188.0,1073.0,2612.0,1346.75,49.5,12482.0,30578.0
10133,75.160,215363841.0,2105709000.0,126195140370.0,-0.093206,47117.0,14780.0,11188.0,1073.0,2612.0,1346.75,49.5,12482.0,30578.0
10134,75.160,174563671.0,2105709000.0,127879707570.0,0.013349,47117.0,14780.0,11188.0,1073.0,2612.0,1346.75,49.5,12482.0,30578.0
10135,75.160,50143475.0,2105709000.0,128721991170.0,0.006587,47117.0,14780.0,11188.0,1073.0,2612.0,1346.75,49.5,12482.0,30578.0
