In [1]:
import warnings
import pandas as pd
import numpy as np
import statsmodels.api as sm
warnings.filterwarnings('ignore')

In [2]:
start_y = 1967
end_y = 2005
split_y = 1985

In [3]:
data = pd.read_csv('./data/master_data.csv')
data = data.loc[(data['fyear'] >= start_y) & (data['fyear'] <= end_y)]
data = data.dropna(subset=['rev', 'exp_lag', 'exp_lead', 'cogs', 'sga', 'depr', 'tax', 'si', 'oth'])

In [4]:
# Coefficients
results = pd.DataFrame(columns=['cogs', 'sga', 'depr', 'tax', 'si', 'oth'])


for year, group in data.groupby('fyear'):
    X = group[['exp_lag', 'cogs', 'sga', 'depr', 'tax', 'si', 'oth', 'exp_lead']]
    X = sm.add_constant(X)
    y = group['rev']

    model = sm.OLS(y, X).fit()
    coef = model.params

    result = pd.DataFrame(
        {
            'cogs': coef['cogs'],
            'sga': coef['sga'],
            'depr': coef['depr'],
            'tax': coef['tax'],
            'si': coef['si'],
            'oth': coef['oth']
        },
        index=[year]
    )
    results = pd.concat([results, result])


results = results.reset_index(names='year')

In [5]:
coefficients = pd.DataFrame(columns=['cogs', 'sga', 'depr', 'tax', 'si', 'oth'])
coefficients = pd.concat([coefficients, results.loc[results['year'] <= split_y, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
coefficients = pd.concat([coefficients, results.loc[results['year'] >= split_y + 1, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
coefficients.loc['difference'] = coefficients.iloc[1] - coefficients.iloc[0]

coefficients

Unnamed: 0,cogs,sga,depr,tax,si,oth
0,0.324612,0.444228,0.556301,1.289574,0.076302,0.330598
1,0.531894,0.623009,0.698164,2.071183,0.391964,0.39558
difference,0.207282,0.178782,0.141863,0.781609,0.315662,0.064982


In [6]:
weight = []
for year, group in data.groupby('fyear'):
    exp_var = group['exp'].var(ddof=1)
    row = {'fyear': year}
    total_weight = 0
    for component in ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']:
        cov = group[['exp', component]].cov().iloc[0, 1]
        w = cov / exp_var if exp_var != 0 else None
        row[component] = w

    weight.append(row)

weight = pd.DataFrame(weight)
weight['total'] = weight[['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].sum(axis=1)
weight[['cogs', 'sga', 'depr', 'tax', 'si', 'oth']] = weight[['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].div(weight['total'], axis=0)

In [7]:
weights = pd.DataFrame(columns=['cogs', 'sga', 'depr', 'tax', 'si', 'oth'])
weights = pd.concat([weights, weight.loc[weight['fyear'] <= split_y, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
weights = pd.concat([weights, weight.loc[weight['fyear'] >= split_y + 1, ['cogs', 'sga', 'depr', 'tax', 'si', 'oth']].mean().to_frame().T], ignore_index=True)
weights.loc['difference'] = weights.iloc[1] - weights.iloc[0]

weights

Unnamed: 0,cogs,sga,depr,tax,si,oth
0,0.860028,0.134761,-0.000958,0.005311,0.0007,0.000158
1,0.828359,0.147279,0.008029,0.007647,0.006741,0.001945
difference,-0.031669,0.012518,0.008987,0.002336,0.006041,0.001787
