# Analysis on cleaned dataset from Data.ipynb

In [None]:
#only perform once in the beginning
#%pip install linearmodels

In [2]:
import pandas as pd
import numpy as np

# Fixed effects regression:
from linearmodels import PanelOLS
import statsmodels.api as sm


In [None]:
# import the csv data from Data.ipynb
houshold = pd.read_csv('houshold')
data = pd.read_csv('data')

# Analysis base modification
- with & without groupings to make sure the definition is correct
- with my dataset and with POWELL dataset
- with time and entity effect


In [None]:
# with dummy * amount:
exog_vars = houshold[['erebate_-2.0', 'erebate_-1.0', 'erebate_0.0', 'erebate_1.0','erebate_2.0', 'erebate_3.0', 'erebate_4.0', 'erebate_5.0', 'erebate_6.0', 
                      'erebate_7.0', 'erebate_8.0', 'erebate_9.0', 'erebate_10.0', 'erebate_11.0']].multiply(houshold['ERBATAMT'], axis = 'index')
exog_vars['month_total'] = pd.Categorical(houshold['month_total'])
exog = sm.add_constant(exog_vars)

model = PanelOLS(houshold['TFEARN'], exog, entity_effects = True, time_effects = True, check_rank = True, drop_absorbed=True)

model.fit()

In [None]:
# die obere regression nochmal machen aber rebate rebate0+rebate1 und rebate2+rebate3 gruppieren: 
# addieren der dummies, die rows bleiben aber so bestehen und werden nicht aggregiert

powell_hh = houshold[['ERBATAMT', 'month_total', 'TFEARN']]
powell_hh['reb'] = houshold['erebate_-1.0'] + houshold['erebate_0.0']
powell_hh['reb_lag'] = houshold['erebate_1.0'] + houshold['erebate_1.0']
powell_hh['reb_lag1'] = houshold['erebate_4.0'] + houshold['erebate_3.0']
powell_hh['reb_lag2'] = houshold['erebate_5.0'] + houshold['erebate_6.0'] + houshold['erebate_7.0'] + houshold['erebate_8.0']

exog_vars = powell_hh[['reb', 'reb_lag']].multiply(powell_hh['ERBATAMT'], axis = 'index') #, 'reb_lag1', 'reb_lag2'

exog = sm.add_constant(exog_vars)

model = PanelOLS(powell_hh['TFEARN'], exog, entity_effects = True, time_effects = True, check_rank = True, drop_absorbed=True)

model.fit()


# include interaction terms? 
# what is the 2SLS approach in Powell? is the first stage Rebate = gamma (interaction terms of HH characteristica)?
# read the stata code again (especially concerning the interacgtion term!)
# set tfearn = 0 if tfearn < 0.

# don't use houshold dataframe that is grouped but use the regular one with pp as index and cluster by hh in the regressions.

In [None]:
# with dummy only:
exog_vars = houshold[['erebate_-2.0', 'erebate_-1.0', 'erebate_0.0', 'erebate_1.0','erebate_2.0', 'erebate_3.0', 'erebate_4.0', 'erebate_5.0', 'erebate_6.0', 
                      'erebate_7.0', 'erebate_8.0', 'erebate_9.0', 'erebate_10.0', 'erebate_11.0']]
exog_vars['month_total'] = pd.Categorical(houshold['month_total'])
exog = sm.add_constant(exog_vars)

model = PanelOLS(houshold['TFEARN'], exog, entity_effects = True, time_effects = True, check_rank = True, drop_absorbed=True)

model.fit()


# there is a cluster option as in the stata code

In [None]:
# next step: estimation with grouping:
totinc_dict = {
    1: range(0,2499),
    2: range(2500,5999),
    3: range(5000,12499),
    4: range(12500,99999)
}

reb_dummies_lst = ['erebate_-1.0', 'erebate_0.0', 'erebate_1.0','erebate_2.0', 'erebate_3.0', 'erebate_4.0']
totinc_dict.keys()

In [None]:
def get_grouping(df_column, group_dict, reb_dummies_df):
    # get flag indicating the group with the dictionary-mapping:
    new_col = df_column.apply(lambda x: next((k for k, v in group_dict.items() if x in v), 0))
    new_col = new_col.rename(new_col.name + '_flag')
    new_cols = pd.get_dummies(new_col) # get dummies from group flag

   # create interaction dummies (group-dummy * rebatelag_dummy) iterating over each group_id and concatenating in the end:
    df_group_dummies = pd.DataFrame(df_column)
    group_lst = group_dict.keys()
    for group_id in group_lst:
        dummies_group = reb_dummies_df.multiply(new_cols[group_id], axis = 'index')
        
        new_dummy_names = []
        for name in dummies_group.columns:
            new_dummy_names.append(name + '-j' + str(group_id))
        dummies_group.columns = new_dummy_names
        df_group_dummies = df_group_dummies.merge(dummies_group,left_index= True, right_index=True)
        #df_group_dummies.replace(np.NaN, 0, inplace = True)
    return df_group_dummies


get_grouping(houshold['TFTOTINC'], totinc_dict, houshold[reb_dummies_lst])

In [None]:
test_group_dummies = get_grouping(houshold['TFTOTINC'], totinc_dict, houshold[reb_dummies_lst])

print(test_group_dummies.shape)
print(test_group_dummies['erebate_3.0-j3'].sum())
reb_interaction_dummies = test_group_dummies.columns
reb_interaction_dummies

In [None]:
df_w_group_interaction = test_group_dummies.merge(houshold[['TFEARN', 'ERBATAMT']], left_index=True, right_index=True)
df_w_group_interaction['month_total'] = pd.Categorical(houshold['month_total'])
df_w_group_interaction

In [None]:
# with interaction dummies for total income groups:
exog_vars = df_w_group_interaction[['erebate_-1.0-j1', 'erebate_0.0-j1', 'erebate_1.0-j1', 'erebate_2.0-j1', 'erebate_3.0-j1', 'erebate_4.0-j1', 'erebate_-1.0-j2',
                                    'erebate_0.0-j2', 'erebate_1.0-j2', 'erebate_2.0-j2', 'erebate_3.0-j2','erebate_4.0-j2', 'erebate_-1.0-j3', 'erebate_0.0-j3', 
                                    'erebate_1.0-j3','erebate_2.0-j3', 'erebate_3.0-j3', 'erebate_4.0-j3', 'erebate_-1.0-j4','erebate_0.0-j4', 'erebate_1.0-j4', 
                                    'erebate_2.0-j4', 'erebate_3.0-j4','erebate_4.0-j4']]#.multiply(df_w_group_interaction['ERBATAMT'], axis = 'index')
#exog_vars['month_total'] = pd.Categorical(houshold['month_total'])

exog = sm.add_constant(exog_vars)

model = PanelOLS(houshold['TFEARN'], exog, entity_effects = True, time_effects = True, check_rank = True, drop_absorbed=True)

model.fit()