In [1]:
import pandas as pd
import statsmodels.api as sm
from scipy import stats

In [None]:
df_control = pd.read_excel(r"control_variable_2.xlsx")
df_control.drop(columns=["nid"], inplace=True)
df_control.head(1)

# 固定效應

In [3]:
dummies_industry = pd.get_dummies(df_control["Industry"], prefix="industry", drop_first=True).astype(int)
dummies_year = pd.get_dummies(df_control["Year"], prefix="year", drop_first=True).astype(int)


# 控制 - 固定

In [None]:
df_control.drop(columns=["Industry", "Year"], inplace=True)
df_control.head(1)

# CAR

In [None]:
df_car = pd.read_excel(r"cumulated_abnormal_return.xlsx", usecols=["CAR [0,1]","CAR [0,6]"])
df_car = df_car * 100
df_car.head(1)

# 各變數數據

In [7]:
df_ambiguity = pd.read_excel(r"result_ambiguity.xlsx")
df_1 = df_ambiguity.drop(columns=["number"])

In [8]:
df_2 = pd.read_excel(r"result_greenwashing.xlsx")
df_2 = df_2.drop(columns=["number"])

In [9]:
df_3 = pd.read_excel(r"result_readability.xlsx")
df_3 = df_3.drop(columns=["number"])

In [10]:
df_4 = pd.read_excel(r"result_subjectivity.xlsx")
df_4 = df_4.drop(columns=["number"])

In [11]:
df_5 = pd.read_excel(r"result_timeliness.xlsx")
df_5 = df_5.drop(columns=["number"])

# Content CAR 

In [None]:
def get_variable_statistic_outcome(dummies_industry, dummies_year, df_model, df_control, df_car):
    all_statistic_list = []
    df = pd.concat([df_model, df_control, dummies_industry, dummies_year], axis=1)
    # Genrative Model
    for model_use in df_model.columns:
        # CAR Windeow
        for car in df_car.columns:
            Y = df_car[car]
            control_variables = ["ROA_t-1", "market_cap"]
            X = df[[model_use] + control_variables + list(dummies_industry.columns) + list(dummies_year.columns)]
            X = sm.add_constant(X)
            result = sm.OLS(Y, X).fit()

            # print(result.summary())

            # Coef 
            coef_list_for_row_1 = ["Coef", model_use, car]
            for var in [model_use, "const", "ROA_t-1", "market_cap"]:
                coef_for_each_variable = str(round(result.params[var], 3))
                p_value = round(result.pvalues[var], 3)

                if p_value < 0.01:
                    coef_for_each_variable += "***"
                elif p_value < 0.05:
                    coef_for_each_variable += "**"
                elif p_value < 0.1:
                    coef_for_each_variable += "*"
                else:
                    pass  

                coef_list_for_row_1.append(coef_for_each_variable)



            # Std.ERROR
            std_error_list_for_row_3 = ["Std_Error", model_use, car]
            for var in [model_use, "const", "ROA_t-1", "market_cap"]:
                std_error = round(result.bse[var], 3)
                std_error_list_for_row_3.append(std_error)
                

            
            # Adjusted R-square
            adj_r2_list_for_row_4 = ["Adj_R2", model_use, car]
            for _ in range(3):
                adjusted_r2 = round(result.rsquared_adj, 3)
                adj_r2_list_for_row_4.append(adjusted_r2)


            # Put all data in list
            all_statistic_list.append(coef_list_for_row_1)
            # all_statistic_list.append(t_test_list_for_row_2)
            all_statistic_list.append(std_error_list_for_row_3)
            all_statistic_list.append(adj_r2_list_for_row_4)


    df = pd.DataFrame(all_statistic_list, columns=["Metric" ,"Model", "CAR", "Text_variable", "Const","ROA_t-1", "market_cap"])
    return df
            

---

# Sentiment 


In [None]:
# Sentiment
df_tone = pd.read_excel(r"sentiment_Net_Tone.xlsx")
df_power = pd.read_excel(r"sentiment_Power.xlsx")

In [None]:
def sentiment_get_variable_statistic_outcome(dummies_industry, dummies_year, df_tone, df_power, df_control, df_car):
    all_statistic_list = []
    df = pd.concat([df_tone, df_power, df_control, dummies_industry, dummies_year], axis=1)
    # Genrative Model
    for model_tone, model_power in zip(df_tone.columns, df_power.columns):
        # CAR Windeow
        for car in df_car.columns:
            Y = df_car[car]
            control_variables = ["ROA_t-1", "market_cap"]
            X = df[[model_tone] + [model_power] + control_variables + list(dummies_industry.columns) + list(dummies_year.columns)]
            X = sm.add_constant(X)
            result = sm.OLS(Y, X).fit()

            # Coef 
            coef_list_for_row_1 = ["Coef", model_power, car]
            for var in [model_tone, model_power, "const", "ROA_t-1", "market_cap"]:
                coef_for_each_variable = str(round(result.params[var], 3))
                p_value = round(result.pvalues[var], 3)

                if p_value < 0.01:
                    coef_for_each_variable += "***"
                elif p_value < 0.05:
                    coef_for_each_variable += "**"
                elif p_value < 0.1:
                    coef_for_each_variable += "*"
                else:
                    pass  

                coef_list_for_row_1.append(coef_for_each_variable)

            # Std.ERROR
            std_error_list_for_row_3 = ["Std_Error", model_power, car]
            for var in [model_tone, model_power, "const", "ROA_t-1", "market_cap"]:
                std_error = round(result.bse[var], 3)
                std_error_list_for_row_3.append(std_error)
                
            # Adjusted R-square
            adj_r2_list_for_row_4 = ["Adj_R2", model_power, car]
            for _ in range(3):
                adjusted_r2 = round(result.rsquared_adj, 3)
                adj_r2_list_for_row_4.append(adjusted_r2)

            # Put all data in list
            all_statistic_list.append(coef_list_for_row_1)
            # all_statistic_list.append(t_test_list_for_row_2)
            all_statistic_list.append(std_error_list_for_row_3)
            all_statistic_list.append(adj_r2_list_for_row_4)

    df = pd.DataFrame(all_statistic_list, columns=["Metric" ,"Model", "CAR", "Net_Tone", "Sentiment_Power", "Const", "ROA_t-1", "market_cap"])
    return df
            

# Bi-LSTM 

In [None]:
df = get_variable_statistic_outcome(dummies_industry, dummies_year, df_tone, df_control, df_car)
df.to_excel("Bi-LSTM_1.xlsx", index=False)

In [None]:
df = get_variable_statistic_outcome(dummies_industry, dummies_year, df_power, df_control, df_car)
df.to_excel("Bi_LSTM_2.xlsx", index=False)