In [8]:
import warnings
import pandas as pd
import numpy as np
import statsmodels.api as sm
warnings.filterwarnings('ignore')

In [9]:
start_y = 1967
end_y = 2023
split_y = 1995

In [10]:
data = pd.read_csv('./data/master_data.csv')
data = data.loc[(data['fyear'] >= start_y) & (data['fyear'] <= end_y)]
data = data.dropna(subset=['rev', 'exp_lag', 'exp_lead', 'cogs', 'sga', 'depr', 'tax', 'si', 'oth'])

In [11]:
# Coefficients
results = pd.DataFrame(columns=['cogs', 'sga', 'depr', 'tax', 'si', 'oth'])


for year, group in data.groupby('fyear'):
    X = group[['exp_lag', 'cogs', 'sga', 'depr', 'tax', 'si', 'oth', 'exp_lead']]
    X = sm.add_constant(X)
    y = group['rev']

    model = sm.OLS(y, X).fit()
    coef = model.params

    result = pd.DataFrame(
        {
            'cogs': coef['cogs'],
            'sga': coef['sga'],
            'depr': coef['depr'],
            'tax': coef['tax'],
            'si': coef['si'],
            'oth': coef['oth']
        },
        index=[year]
    )
    results = pd.concat([results, result])


results = results.reset_index(names='year')

In [12]:
coefficients = pd.DataFrame(columns=['cogs', 'sga', 'depr', 'tax', 'si', 'oth'])
coefficients = pd.concat([coefficients, results.loc[results['year'] <= split_y, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
coefficients = pd.concat([coefficients, results.loc[results['year'] >= split_y + 1, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
coefficients.loc['difference'] = coefficients.iloc[1] - coefficients.iloc[0]

coefficients

Unnamed: 0,cogs,sga,depr,tax,si,oth
0,0.337226,0.451699,0.522809,1.519892,0.191457,0.324713
1,0.55557,0.721819,0.819671,2.010124,0.222713,0.256884
difference,0.218344,0.27012,0.296862,0.490232,0.031257,-0.067829


In [13]:
weight = []
for year, group in data.groupby('fyear'):
    exp_var = group['exp'].var(ddof=0)
    row = {'fyear': year}
    for component in ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']:
        cov = group[['exp', component]].cov().iloc[0, 1]
        w = cov / exp_var if exp_var != 0 else None
        row[component] = w
    weight.append(row)

weight = pd.DataFrame(weight)
weight['total'] = weight[['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].sum(axis=1)
weight[['cogs', 'sga', 'depr', 'tax', 'si', 'oth']] = weight[['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].div(weight['total'], axis=0)

In [14]:
weights = pd.DataFrame(columns=['cogs', 'sga', 'depr', 'tax', 'si', 'oth'])
weights = pd.concat([weights, weight.loc[weight['fyear'] <= split_y, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
weights = pd.concat([weights, weight.loc[weight['fyear'] >= split_y + 1, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
weights.loc['difference'] = weights.iloc[1] - weights.iloc[0]

weights

Unnamed: 0,cogs,sga,depr,tax,si,oth
0,0.854125,0.136216,0.000701,0.005341,0.002854,0.000764
1,0.842178,0.128876,0.010711,0.009926,0.007074,0.001235
difference,-0.011947,-0.00734,0.01001,0.004585,0.00422,0.000471
