In [14]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from tqdm.auto import tqdm

In [4]:
df_control = pd.read_excel(r"control_variable_2.xlsx")
df_control.drop(columns=["nid"], inplace=True)

In [6]:
dummies_industry = pd.get_dummies(df_control["Industry"], prefix="industry", drop_first=True).astype(int)
dummies_year = pd.get_dummies(df_control["Year"], prefix="year", drop_first=True).astype(int)

In [None]:
df_control.drop(columns=["Industry", "Year"], inplace=True)
df_control.head(1)

In [None]:
df_car = pd.read_excel(r"cumulated_abnormal_return.xlsx")
df_car.drop(columns=["number", "CAR [0,3]", "CAR [0,4]"], inplace=True)
df_car.head(1)

In [None]:
df_ambiguity = pd.read_excel(r"result_ambiguity.xlsx")
df_1 = df_ambiguity.drop(columns=["number"])

df_2 = pd.read_excel(r"result_greenwashing.xlsx")
df_2 = df_2.drop(columns=["number"])

df_3 = pd.read_excel(r"result_readability.xlsx")
df_3 = df_3.drop(columns=["number"])

df_4 = pd.read_excel(r"result_subjectivity.xlsx")
df_4 = df_4.drop(columns=["number"])

df_5 = pd.read_excel(r"result_timeliness.xlsx")
df_5 = df_5.drop(columns=["number"])

df_tone = pd.read_excel(r"sentiment_Net_Tone.xlsx")
df_power = pd.read_excel(r"sentiment_Power.xlsx")

In [None]:
control_variables_three = ["log_Firm_size", "ROA_t-1", "market_cap"]
control_variables_two = ["ROA_t-1", "market_cap"]

def vif_with_fix_effect(control_variables_list):
    for df_variable in tqdm([df_1, df_2, df_3, df_4, df_5], desc="衝鴨!!!"):
        df = pd.concat([df_control, df_variable, dummies_industry, dummies_year], axis=1)
        for model in df_variable.columns:
            X = df[[model] + control_variables_list + list(dummies_industry.columns) + list(dummies_year.columns)]
            X = sm.add_constant(X)

            vif_data = pd.DataFrame()
            vif_data["feature"] = X.columns
            vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

            important_features = ['const', model] + control_variables_list
            vif_data = vif_data[vif_data['feature'].isin(important_features)]

            vif_data.to_excel(f"VIF_Two_Control/{model}_VIF.xlsx", index=False)

In [22]:
def sentiment_vif_with_fix_effect(control_variables_list, df_tone, df_power):
        df = pd.concat([df_control, df_tone, df_power, dummies_industry, dummies_year], axis=1)
        for df_tone, df_power in tqdm(zip(df_tone, df_power)):
            X = df[[df_tone] + [df_power] + control_variables_list + list(dummies_industry.columns) + list(dummies_year.columns)]
            X = sm.add_constant(X)

            vif_data = pd.DataFrame()
            vif_data["feature"] = X.columns
            vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

            important_features = ['const', df_tone, df_power] + control_variables_list
            vif_data = vif_data[vif_data['feature'].isin(important_features)]

            vif_data.to_excel(f"VIF_Two_Control/{df_power}_VIF.xlsx", index=False)

In [None]:
vif_with_fix_effect(control_variables_two)

In [None]:
sentiment_vif_with_fix_effect(control_variables_two, df_tone, df_power)