In [1]:
import warnings
import pandas as pd
import numpy as np
import statsmodels.api as sm
warnings.filterwarnings('ignore')

In [2]:
start_y = 2012
end_y = 2023
split_y = 2017

In [3]:
data = pd.read_csv('./data/master_data.csv')
data = data.loc[(data['fyear'] >= start_y) & (data['fyear'] <= end_y)]
data = data.dropna(subset=['rev', 'exp_lag', 'exp_lead', 'cogs', 'sga', 'depr', 'tax', 'si', 'oth'])

In [4]:
# Coefficients
results = pd.DataFrame(columns=['cogs', 'sga', 'depr', 'tax', 'si', 'oth'])


for year, group in data.groupby('fyear'):
    X = group[['exp_lag', 'cogs', 'sga', 'depr', 'tax', 'si', 'oth', 'exp_lead']]
    X = sm.add_constant(X)
    y = group['rev']

    model = sm.OLS(y, X).fit()
    coef = model.params

    result = pd.DataFrame(
        {
            'cogs': coef['cogs'],
            'sga': coef['sga'],
            'depr': coef['depr'],
            'tax': coef['tax'],
            'si': coef['si'],
            'oth': coef['oth']
        },
        index=[year]
    )
    results = pd.concat([results, result])


results = results.reset_index(names='year')

In [5]:
coefficients = pd.DataFrame(columns=['cogs', 'sga', 'depr', 'tax', 'si', 'oth'])
coefficients = pd.concat([coefficients, results.loc[results['year'] <= split_y, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
coefficients = pd.concat([coefficients, results.loc[results['year'] >= split_y + 1, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
coefficients.loc['difference'] = coefficients.iloc[1] - coefficients.iloc[0]

coefficients

Unnamed: 0,cogs,sga,depr,tax,si,oth
0,0.440908,0.659121,0.645848,1.547493,0.187052,0.152747
1,0.401987,0.656774,0.532957,2.335698,-0.000661,0.19679
difference,-0.038921,-0.002347,-0.112891,0.788205,-0.187714,0.044043


In [6]:
weight = []
for year, group in data.groupby('fyear'):
    exp_var = group['exp'].var(ddof=0)
    row = {'fyear': year}
    for component in ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']:
        cov = group[['exp', component]].cov().iloc[0, 1]
        w = cov / exp_var if exp_var != 0 else None
        row[component] = w
    weight.append(row)

weight = pd.DataFrame(weight)
weight['total'] = weight[['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].sum(axis=1)
weight[['cogs', 'sga', 'depr', 'tax', 'si', 'oth']] = weight[['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].div(weight['total'], axis=0)

In [7]:
weights = pd.DataFrame(columns=['cogs', 'sga', 'depr', 'tax', 'si', 'oth'])
weights = pd.concat([weights, weight.loc[weight['fyear'] <= split_y, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
weights = pd.concat([weights, weight.loc[weight['fyear'] >= split_y + 1, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
weights.loc['difference'] = weights.iloc[1] - weights.iloc[0]

weights

Unnamed: 0,cogs,sga,depr,tax,si,oth
0,0.856555,0.116833,0.010248,0.01116,0.004461,0.000743
1,0.861917,0.110135,0.010984,0.007924,0.008628,0.000412
difference,0.005362,-0.006698,0.000736,-0.003235,0.004168,-0.000332
