## Descriptive Statistics

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, jarque_bera
from statsmodels.tsa.stattools import adfuller

# Tạo một DataFrame ví dụ
data = np.random.randn(1000)  # Dữ liệu ngẫu nhiên
df = pd.DataFrame(data, columns=["value"])

# Thống kê mô tả cơ bản
description = df.describe()

# Tính độ lệch chuẩn (SD), độ nghiêng (Skewness), độ nhọn (Kurtosis)
sd = df.std()[0]
skewness = skew(df["value"])
kurt = kurtosis(df["value"])

# Kiểm định Jarque-Bera
jb_stat, jb_p_value = jarque_bera(df["value"])

# Kiểm định ADF (Augmented Dickey-Fuller)
adf_stat, adf_p_value, _, _, _, _ = adfuller(df["value"])

# Hiển thị kết quả
print("Descriptive Statistics Summary:")
print(description)

print("\nAdditional Statistics:")
print(f"Standard Deviation: {sd}")
print(f"Skewness: {skewness}")
print(f"Kurtosis: {kurt}")
print(f"Jarque-Bera Statistic: {jb_stat}, p-value: {jb_p_value}")
print(f"ADF Statistic: {adf_stat}, p-value: {adf_p_value}")


Descriptive Statistics Summary:
             value
count  1000.000000
mean     -0.012856
std       1.000852
min      -2.742142
25%      -0.690777
50%       0.005132
75%       0.671490
max       3.033309

Additional Statistics:
Standard Deviation: 1.0008518848887378
Skewness: 0.011029849205772154
Kurtosis: -0.21333745520802783
Jarque-Bera Statistic: 1.9166458370268986, p-value: 0.38353556730936256
ADF Statistic: -31.50522873258428, p-value: 0.0


In [2]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, jarque_bera
from statsmodels.tsa.stattools import adfuller

In [3]:
data = pd.read_excel('total.xlsx', sheet_name="Abnormal")

In [4]:
data

Unnamed: 0,abnormal_return,date,herding_behavior,loss_aversion,overconfidence,mental_accounting,EPU,Unnamed: 7,Unnamed: 8
0,-0.11683,2016-01-05,1.1940,-20.021108,4492.0956,258.754588,134.557232,2016.0,1.0
1,0.27626,2016-01-06,1.5284,-393.918243,3513.5350,257.722784,169.812594,2016.0,2.0
2,-0.50229,2016-01-07,1.5035,-1660.728733,5424.6609,253.037282,140.222263,2016.0,3.0
3,-0.96876,2016-01-08,1.3644,-1424.111931,4489.1200,249.687024,132.802503,2016.0,4.0
4,-3.62344,2016-01-12,3.5312,-3339.907279,6242.5559,240.993388,159.225263,2016.0,5.0
...,...,...,...,...,...,...,...,...,...
1949,-0.20278,2023-12-25,0.8729,66.278940,2275.0198,132.875000,,,
1950,0.09694,2023-12-26,0.5971,-118.776160,2597.6489,133.046200,,,
1951,1.16473,2023-12-27,1.0959,1536.529300,5271.3745,134.361200,,,
1952,0.27077,2023-12-28,1.2209,-149.761440,4529.7546,134.975000,,,


In [5]:
# Lấy các chuỗi thời gian từ các cột
abnormal_return = data['abnormal_return']
economic_policy_uncertainty = data['EPU']
herding_behavior = data['herding_behavior']
loss_aversion = data['loss_aversion']
mental_accounting = data['mental_accounting']
overconfidence = data['overconfidence']

In [6]:
variables = {
    "Abnormal return": abnormal_return,
    "Economic policy uncertainty": economic_policy_uncertainty,
    "Herding Behavior": herding_behavior,
    "Loss Aversion": loss_aversion,
    "Mental Accounting": mental_accounting,
    "Overconfidence": overconfidence
}

In [7]:
for name, series in variables.items():
    variables[name] = series.dropna()

In [8]:
# Kiểm định ADF- kiểm tra tính dừng của time series
for name, series in variables.items():
    result = adfuller(series)
    print(f"{name}:")
    print("ADF Statistic:", result[0])
    print("p-value:", result[1])
    if result[1] <= 0.05:
        print(f"{name} is stationary.")
    else:
        print(f"{name} is not stationary.")
    print("-" * 30)

Abnormal return:
ADF Statistic: -16.985650595550315
p-value: 8.945555216927051e-30
Abnormal return is stationary.
------------------------------
Economic policy uncertainty:
ADF Statistic: -3.998292920500164
p-value: 0.0014194764474962929
Economic policy uncertainty is stationary.
------------------------------
Herding Behavior:
ADF Statistic: -8.639022945571202
p-value: 5.4901271102448706e-14
Herding Behavior is stationary.
------------------------------
Loss Aversion:
ADF Statistic: -29.927362210639487
p-value: 0.0
Loss Aversion is stationary.
------------------------------
Mental Accounting:
ADF Statistic: -2.2638933556361285
p-value: 0.18387756131450816
Mental Accounting is not stationary.
------------------------------
Overconfidence:
ADF Statistic: -9.704257821497567
p-value: 1.0521542362288131e-16
Overconfidence is stationary.
------------------------------


## before covid

In [9]:
data_bf = pd.read_excel('total.xlsx', sheet_name="before")

In [10]:

abnormal_return = data_bf['abnormal_return']
economic_policy_uncertainty = data_bf['EPU']
herding_behavior = data_bf['herding_behavior']
loss_aversion = data_bf['loss_aversion']
mental_accounting = data_bf['mental_accounting']
overconfidence = data_bf['overconfidence']

variables = {
    "Abnormal return": abnormal_return,
    "Economic policy uncertainty": economic_policy_uncertainty,
    "Herding Behavior": herding_behavior,
    "Loss Aversion": loss_aversion,
    "Mental Accounting": mental_accounting,
    "Overconfidence": overconfidence
}

# Kiểm định ADF- kiểm tra tính dừng của time series
results = []

# Kiểm tra tính dừng cho từng cột
for col in data_bf.columns:
    series = data_bf[col].dropna()  # Loại bỏ giá trị NaN
    result = adfuller(series)
    results.append({
        "Variable": col,
        "ADF Statistic": result[0],
        "p-value": result[1],
        "Stationary": "Yes" if result[1] <= 0.05 else "No"
    })

# Chuyển kết quả thành DataFrame
results_df = pd.DataFrame(results)

In [11]:
results_df

Unnamed: 0,Variable,ADF Statistic,p-value,Stationary
0,abnormal_return,-13.412661,4.342948e-25,Yes
1,date,10.121638,1.0,No
2,herding_behavior,-10.390557,2.0262170000000002e-18,Yes
3,loss_aversion,-30.839286,0.0,Yes
4,overconfidence,-4.648953,0.0001048346,Yes
5,mental_accounting,-1.680559,0.4411559,No
6,EPU,-2.603434,0.09228955,No


## Statistics of before, during and after covid period

In [12]:
# import pandas as pd
# from statsmodels.tsa.stattools import adfuller

# # Đọc dữ liệu từ các file Excel
# df_before = pd.read_excel('total.xlsx', sheet_name="before")
# df_during = pd.read_excel('total.xlsx', sheet_name="during")
# df_after = pd.read_excel('total.xlsx', sheet_name="after")

# # Các tên cột mà bạn cần kiểm tra tính dừng
# variables = {
#     "Abnormal return": "abnormal_return",
#     "Economic policy uncertainty": "EPU",
#     "Herding Behavior": "herding_behavior",
#     "Loss Aversion": "loss_aversion",
#     "Mental Accounting": "mental_accounting",
#     "Overconfidence": "overconfidence"
# }

# # Hàm kiểm định ADF và lưu kết quả vào DataFrame
# def adf_test(df, variables, period):
#     results = []
#     for var_name, column_name in variables.items():
#         # Lấy chuỗi tương ứng từ DataFrame bằng tên cột
#         series = df[column_name].dropna()  # Loại bỏ giá trị NaN
#         result = adfuller(series)
#         results.append({
#             "Variable": var_name,
#             "ADF Statistic": result[0],
#             "p-value": result[1],
#             "Stationary": "Yes" if result[1] <= 0.05 else "No",
#             "Period": period  # Thêm cột mốc thời gian
#         })
#     return results

# # Kiểm định tính dừng cho ba bảng thời gian và lưu vào 1 DataFrame duy nhất
# all_results = []

# # Kiểm định cho "Before" period
# all_results.extend(adf_test(df_before, variables, "Before"))

# # Kiểm định cho "During" period
# all_results.extend(adf_test(df_during, variables, "During"))

# # Kiểm định cho "After" period
# all_results.extend(adf_test(df_after, variables, "After"))

# # Chuyển kết quả thành DataFrame
# adf_results = pd.DataFrame(all_results)

# # Hiển thị kết quả
# print(adf_results)



                       Variable  ADF Statistic       p-value Stationary  \
0               Abnormal return     -13.412661  4.342948e-25        Yes   
1   Economic policy uncertainty      -2.603434  9.228955e-02         No   
2              Herding Behavior     -10.390557  2.026217e-18        Yes   
3                 Loss Aversion     -30.839286  0.000000e+00        Yes   
4             Mental Accounting      -1.680559  4.411559e-01         No   
5                Overconfidence      -4.648953  1.048346e-04        Yes   
6               Abnormal return      -5.608387  1.219321e-06        Yes   
7   Economic policy uncertainty      -1.929056  3.185180e-01         No   
8              Herding Behavior      -4.698870  8.456507e-05        Yes   
9                 Loss Aversion     -10.535009  8.947913e-19        Yes   
10            Mental Accounting      -1.927172  3.193886e-01         No   
11               Overconfidence      -7.600814  2.392086e-11        Yes   
12              Abnormal 

In [13]:
adf_results

Unnamed: 0,Variable,ADF Statistic,p-value,Stationary,Period
0,Abnormal return,-13.412661,4.342948e-25,Yes,Before
1,Economic policy uncertainty,-2.603434,0.09228955,No,Before
2,Herding Behavior,-10.390557,2.0262170000000002e-18,Yes,Before
3,Loss Aversion,-30.839286,0.0,Yes,Before
4,Mental Accounting,-1.680559,0.4411559,No,Before
5,Overconfidence,-4.648953,0.0001048346,Yes,Before
6,Abnormal return,-5.608387,1.219321e-06,Yes,During
7,Economic policy uncertainty,-1.929056,0.318518,No,During
8,Herding Behavior,-4.69887,8.456507e-05,Yes,During
9,Loss Aversion,-10.535009,8.947912999999999e-19,Yes,During


In [14]:
# Đọc dữ liệu từ các file Excel
df_before = pd.read_excel('total.xlsx', sheet_name="before")
df_during = pd.read_excel('total.xlsx', sheet_name="during")
df_after = pd.read_excel('total.xlsx', sheet_name="after")

# Các tên cột mà bạn cần kiểm tra tính dừng
variables = {
    "Abnormal return": "abnormal_return",
    "Economic policy uncertainty": "EPU",
    "Herding Behavior": "herding_behavior",
    "Loss Aversion": "loss_aversion",
    "Mental Accounting": "mental_accounting",
    "Overconfidence": "overconfidence"
}

# Hàm kiểm định ADF và lưu kết quả vào DataFrame
def adf_test(df, variables, period):
    results = []
    for var_name, column_name in variables.items():
        series = df[column_name].dropna()  # Loại bỏ giá trị NaN
        result = adfuller(series)
        results.append({
            "Variable": var_name,
            "ADF Statistic": result[0],
            "p-value": result[1],
            "Stationary": "Yes" if result[1] <= 0.05 else "No",
            "Period": period
        })
    return results

# Hàm tính thống kê mô tả và các thống kê khác
def calculate_statistics(df, period):
    results = []

    # Thống kê mô tả cơ bản
    description = df.describe().transpose()
    
    # Các thống kê cần tính thêm: SD, Skewness, Kurtosis
    for column in df.columns:
        if column != 'date':  # Bỏ qua cột 'date'
            series = df[column].dropna()  # Loại bỏ NaN
            
            # Độ lệch chuẩn (Standard Deviation)
            sd = series.std()
            
            # Độ nghiêng (Skewness)
            skewness = skew(series)
            
            # Độ nhọn (Kurtosis)
            kurt = kurtosis(series)
            
            # Kiểm định Jarque-Bera
            jb_stat, jb_p_value = jarque_bera(series)
            
            # Lưu kết quả
            results.append({
                "Variable": column,
                "Mean": description.at[column, 'mean'],
                "SD": sd,
                "Skewness": skewness,
                "Kurtosis": kurt,
                "Jarque-Bera Statistic": jb_stat,
                "Jarque-Bera p-value": jb_p_value,
                "Period": period
            })
    
    return results

# Kiểm định cho các DataFrame
all_results = []

# Kiểm định cho "Before" period
all_results.extend(adf_test(df_before, variables, "Before"))
all_results.extend(calculate_statistics(df_before, "Before"))

# Kiểm định cho "During" period
all_results.extend(adf_test(df_during, variables, "During"))
all_results.extend(calculate_statistics(df_during, "During"))

# Kiểm định cho "After" period
all_results.extend(adf_test(df_after, variables, "After"))
all_results.extend(calculate_statistics(df_after, "After"))

# Chuyển kết quả thành DataFrame
final_results = pd.DataFrame(all_results)

# Lưu kết quả vào file Excel
final_results.to_excel("statistics_results.xlsx", index=False)

# Hiển thị kết quả
print(final_results)

                       Variable  ADF Statistic       p-value Stationary  \
0               Abnormal return     -13.412661  4.342948e-25        Yes   
1   Economic policy uncertainty      -2.603434  9.228955e-02         No   
2              Herding Behavior     -10.390557  2.026217e-18        Yes   
3                 Loss Aversion     -30.839286  0.000000e+00        Yes   
4             Mental Accounting      -1.680559  4.411559e-01         No   
5                Overconfidence      -4.648953  1.048346e-04        Yes   
6               abnormal_return            NaN           NaN        NaN   
7              herding_behavior            NaN           NaN        NaN   
8                 loss_aversion            NaN           NaN        NaN   
9                overconfidence            NaN           NaN        NaN   
10            mental_accounting            NaN           NaN        NaN   
11                          EPU            NaN           NaN        NaN   
12              Abnormal 

In [15]:
final_results.to_excel(r'C:\Users\DELL\Desktop\financial materials\table_1.xlsx')

In [16]:
df_before.columns

Index(['abnormal_return', 'date', 'herding_behavior', 'loss_aversion',
       'overconfidence', 'mental_accounting', 'EPU'],
      dtype='object')

## Johansen cointegration test

In [17]:
from statsmodels.tsa.vector_ar.vecm import VECM

In [20]:
import pandas as pd
from statsmodels.tsa.vector_ar.vecm import coint_johansen

# Giả sử bạn có DataFrame df với các biến trên
# Dữ liệu mẫu: tạo một DataFrame giả


abnormal_return = data_bf['abnormal_return']
economic_policy_uncertainty = data_bf['EPU']
herding_behavior = data_bf['herding_behavior']
loss_aversion = data_bf['loss_aversion']
mental_accounting = data_bf['mental_accounting']
overconfidence = data_bf['overconfidence']

variables = {
    "Abnormal return": abnormal_return,
    "Economic policy uncertainty": economic_policy_uncertainty,
    "Herding Behavior": herding_behavior,
    "Loss Aversion": loss_aversion,
    "Mental Accounting": mental_accounting,
    "Overconfidence": overconfidence
}

data = df_before[["abnormal_return","herding_behavior","loss_aversion","overconfidence","mental_accounting"]]

# Chuyển dữ liệu thành DataFrame
#df = pd.DataFrame(data)

# Thực hiện kiểm tra đồng liên kết Johansen
jres = coint_johansen(data, det_order=0, k_ar_diff=1)

# Lấy các giá trị thống kê Trace và Max-Eigen cùng với các giá trị p
trace_stat = jres.lr1  # Giá trị thống kê Trace
max_eigen_stat = jres.lr2  # Giá trị thống kê Max-Eigen
cv_trace = jres.cvt[:, 1]  # Giá trị p-value cho Trace (cột thứ 2)
cv_max_eigen = jres.cvt[:, 2]  # Giá trị p-value cho Max-Eigen (cột thứ 3)

# Tạo bảng kết quả
result = pd.DataFrame({
    'Hypothesized No. of CE(s)': ['None', 'At most 1', 'At most 2', 'At most 3', 'At most 4'
                                   ],
    'Eigenvalue': jres.eig,
    'Trace Statistic': trace_stat,
    'Critical Value (5%) Trace': cv_trace,
    'Prob. (Trace)': jres.cvt[:, 1],
    'Max-Eigen Statistic': max_eigen_stat,
    'Critical Value (5%) Max-Eigen': cv_max_eigen,
    'Prob. (Max-Eigen)': jres.cvt[:, 2]
})

# In kết quả bảng
print(result)


  Hypothesized No. of CE(s)  Eigenvalue  Trace Statistic  \
0                      None    0.417613      1326.614144   
1                 At most 1    0.312010       799.509200   
2                 At most 2    0.260055       434.877834   
3                 At most 3    0.132242       141.228058   
4                 At most 4    0.003002         2.931650   

   Critical Value (5%) Trace  Prob. (Trace)  Max-Eigen Statistic  \
0                    69.8189        69.8189           527.104944   
1                    47.8545        47.8545           364.631366   
2                    29.7961        29.7961           293.649776   
3                    15.4943        15.4943           138.296408   
4                     3.8415         3.8415             2.931650   

   Critical Value (5%) Max-Eigen  Prob. (Max-Eigen)  
0                        77.8202            77.8202  
1                        54.6815            54.6815  
2                        35.4628            35.4628  
3             

In [40]:
df_before = pd.read_excel('total.xlsx', sheet_name="before")
df_during = pd.read_excel('total.xlsx', sheet_name="during")
df_after = pd.read_excel('total.xlsx', sheet_name="after")

In [41]:
import pandas as pd
from statsmodels.tsa.vector_ar.vecm import coint_johansen

# Giả sử bạn có DataFrame df với các biến trên
# Dữ liệu mẫu: tạo một DataFrame giả



  Hypothesized No. of CE(s)  Eigenvalue  Trace Statistic  \
0                      None    0.397262       651.141576   
1                 At most 1    0.342526       395.980306   
2                 At most 2    0.230781       184.628145   
3                 At most 3    0.092207        52.388616   
4                 At most 4    0.007181         3.632281   

   Critical Value (5%) Trace  Prob. (Trace)  Max-Eigen Statistic  \
0                    69.8189        69.8189           255.161269   
1                    47.8545        47.8545           211.352162   
2                    29.7961        29.7961           132.239529   
3                    15.4943        15.4943            48.756335   
4                     3.8415         3.8415             3.632281   

   Critical Value (5%) Max-Eigen  Prob. (Max-Eigen)  
0                        77.8202            77.8202  
1                        54.6815            54.6815  
2                        35.4628            35.4628  
3             

In [42]:
result

Unnamed: 0,Hypothesized No. of CE(s),Eigenvalue,Trace Statistic,Critical Value (5%) Trace,Prob. (Trace),Max-Eigen Statistic,Critical Value (5%) Max-Eigen,Prob. (Max-Eigen)
0,,0.417613,1326.614144,69.8189,69.8189,527.104944,77.8202,77.8202
1,At most 1,0.31201,799.5092,47.8545,47.8545,364.631366,54.6815,54.6815
2,At most 2,0.260055,434.877834,29.7961,29.7961,293.649776,35.4628,35.4628
3,At most 3,0.132242,141.228058,15.4943,15.4943,138.296408,19.9349,19.9349
4,At most 4,0.003002,2.93165,3.8415,3.8415,2.93165,6.6349,6.6349


In [43]:
result_1

Unnamed: 0,Hypothesized No. of CE(s),Eigenvalue,Trace Statistic,Critical Value (5%) Trace,Prob. (Trace),Max-Eigen Statistic,Critical Value (5%) Max-Eigen,Prob. (Max-Eigen)
0,,0.397262,651.141576,69.8189,69.8189,255.161269,77.8202,77.8202
1,At most 1,0.342526,395.980306,47.8545,47.8545,211.352162,54.6815,54.6815
2,At most 2,0.230781,184.628145,29.7961,29.7961,132.239529,35.4628,35.4628
3,At most 3,0.092207,52.388616,15.4943,15.4943,48.756335,19.9349,19.9349
4,At most 4,0.007181,3.632281,3.8415,3.8415,3.632281,6.6349,6.6349


In [39]:
df_during.isna().sum()

abnormal_return        0
date                   0
herding_behavior       0
loss_aversion         20
overconfidence         0
mental_accounting      0
EPU                  481
dtype: int64

In [2]:
abnormal_return = df_after['abnormal_return']
economic_policy_uncertainty = df_after['EPU']
herding_behavior = df_after['herding_behavior']
loss_aversion = df_after['loss_aversion']
mental_accounting = df_after['mental_accounting']
overconfidence = df_after['overconfidence']

variables = {
    "Abnormal return": abnormal_return,
    "Economic policy uncertainty": economic_policy_uncertainty,
    "Herding Behavior": herding_behavior,
    "Loss Aversion": loss_aversion,
    "Mental Accounting": mental_accounting,
    "Overconfidence": overconfidence
}

data = df_after[["abnormal_return","herding_behavior","loss_aversion","overconfidence","mental_accounting"]]

# Chuyển dữ liệu thành DataFrame
#df = pd.DataFrame(data)

# Thực hiện kiểm tra đồng liên kết Johansen
jres = coint_johansen(data, det_order=0, k_ar_diff=1)

# Lấy các giá trị thống kê Trace và Max-Eigen cùng với các giá trị p
trace_stat = jres.lr1  # Giá trị thống kê Trace
max_eigen_stat = jres.lr2  # Giá trị thống kê Max-Eigen
cv_trace = jres.cvt[:, 1]  # Giá trị p-value cho Trace (cột thứ 2)
cv_max_eigen = jres.cvt[:, 2]  # Giá trị p-value cho Max-Eigen (cột thứ 3)

# Tạo bảng kết quả
result_2 = pd.DataFrame({
    'Hypothesized No. of CE(s)': ['None', 'At most 1', 'At most 2', 'At most 3', 'At most 4'
                                   ],
    'Eigenvalue': jres.eig,
    'Trace Statistic': trace_stat,
    'Critical Value (5%) Trace': cv_trace,
    'Prob. (Trace)': jres.cvt[:, 1],
    'Max-Eigen Statistic': max_eigen_stat,
    'Critical Value (5%) Max-Eigen': cv_max_eigen,
    'Prob. (Max-Eigen)': jres.cvt[:, 2]
})

# In kết quả bảng
print(result_2)

  Hypothesized No. of CE(s)  Eigenvalue  Trace Statistic  \
0                      None    0.400580       742.111346   
1                 At most 1    0.344798       502.080597   
2                 At most 2    0.314284       303.781820   
3                 At most 3    0.236939       126.832262   
4                 At most 4    0.000014         0.006457   

   Critical Value (5%) Trace  Prob. (Trace)  Max-Eigen Statistic  \
0                    69.8189        69.8189           240.030749   
1                    47.8545        47.8545           198.298778   
2                    29.7961        29.7961           176.949558   
3                    15.4943        15.4943           126.825805   
4                     3.8415         3.8415             0.006457   

   Critical Value (5%) Max-Eigen  Prob. (Max-Eigen)  
0                        77.8202            77.8202  
1                        54.6815            54.6815  
2                        35.4628            35.4628  
3             

In [46]:
result_2

Unnamed: 0,Hypothesized No. of CE(s),Eigenvalue,Trace Statistic,Critical Value (5%) Trace,Prob. (Trace),Max-Eigen Statistic,Critical Value (5%) Max-Eigen,Prob. (Max-Eigen)
0,,0.40058,742.111346,69.8189,69.8189,240.030749,77.8202,77.8202
1,At most 1,0.344798,502.080597,47.8545,47.8545,198.298778,54.6815,54.6815
2,At most 2,0.314284,303.78182,29.7961,29.7961,176.949558,35.4628,35.4628
3,At most 3,0.236939,126.832262,15.4943,15.4943,126.825805,19.9349,19.9349
4,At most 4,1.4e-05,0.006457,3.8415,3.8415,0.006457,6.6349,6.6349


In [48]:
result_df = pd.concat([result, result_1, result_2], axis=0)

In [49]:
result_df.to_excel(r')

Unnamed: 0,Hypothesized No. of CE(s),Eigenvalue,Trace Statistic,Critical Value (5%) Trace,Prob. (Trace),Max-Eigen Statistic,Critical Value (5%) Max-Eigen,Prob. (Max-Eigen)
0,,0.417613,1326.614144,69.8189,69.8189,527.104944,77.8202,77.8202
1,At most 1,0.31201,799.5092,47.8545,47.8545,364.631366,54.6815,54.6815
2,At most 2,0.260055,434.877834,29.7961,29.7961,293.649776,35.4628,35.4628
3,At most 3,0.132242,141.228058,15.4943,15.4943,138.296408,19.9349,19.9349
4,At most 4,0.003002,2.93165,3.8415,3.8415,2.93165,6.6349,6.6349
0,,0.397262,651.141576,69.8189,69.8189,255.161269,77.8202,77.8202
1,At most 1,0.342526,395.980306,47.8545,47.8545,211.352162,54.6815,54.6815
2,At most 2,0.230781,184.628145,29.7961,29.7961,132.239529,35.4628,35.4628
3,At most 3,0.092207,52.388616,15.4943,15.4943,48.756335,19.9349,19.9349
4,At most 4,0.007181,3.632281,3.8415,3.8415,3.632281,6.6349,6.6349


In [3]:
import pandas as pd
import numpy as np
from statsmodels.tsa.vector_ar.vecm import coint_johansen

df_after = df_after = pd.read_excel('total.xlsx', sheet_name="after")

abnormal_return = df_after['abnormal_return']
economic_policy_uncertainty =df_after['EPU']
herding_behavior = df_after['herding_behavior']
loss_aversion = df_after['loss_aversion']
mental_accounting = df_after['mental_accounting']
overconfidence = df_after['overconfidence']

variables = {
    "Abnormal return": abnormal_return,
    "Economic policy uncertainty": economic_policy_uncertainty,
    "Herding Behavior": herding_behavior,
    "Loss Aversion": loss_aversion,
    "Mental Accounting": mental_accounting,
    "Overconfidence": overconfidence
}

data = df_after[["abnormal_return","herding_behavior","loss_aversion","overconfidence","mental_accounting"]]

# Chuyển dữ liệu thành DataFrame
#df = pd.DataFrame(data)

# Thực hiện kiểm tra đồng liên kết Johansen
jres = coint_johansen(data, det_order=0, k_ar_diff=1)

# Lấy các giá trị thống kê Trace và Max-Eigen cùng với các giá trị p
trace_stat = jres.lr1  # Giá trị thống kê Trace
max_eigen_stat = jres.lr2  # Giá trị thống kê Max-Eigen
cv_trace = jres.cvt[:, 1]  # Giá trị p-value cho Trace (cột thứ 2)
cv_max_eigen = jres.cvt[:, 2]  # Giá trị p-value cho Max-Eigen (cột thứ 3)

# Tạo bảng kết quả
result_2 = pd.DataFrame({
    'Hypothesized No. of CE(s)': ['None', 'At most 1', 'At most 2', 'At most 3', 'At most 4'
                                   ],
    'Eigenvalue': jres.eig,
    'Trace Statistic': trace_stat,
    'Critical Value (5%) Trace': cv_trace,
    'Prob. (Trace)': jres.cvt[:, 1],
    'Max-Eigen Statistic': max_eigen_stat,
    'Critical Value (5%) Max-Eigen': cv_max_eigen,
    'Prob. (Max-Eigen)': jres.cvt[:, 2]
})

# In kết quả bảng
print(result_2)

  Hypothesized No. of CE(s)  Eigenvalue  Trace Statistic  \
0                      None    0.400580       742.111346   
1                 At most 1    0.344798       502.080597   
2                 At most 2    0.314284       303.781820   
3                 At most 3    0.236939       126.832262   
4                 At most 4    0.000014         0.006457   

   Critical Value (5%) Trace  Prob. (Trace)  Max-Eigen Statistic  \
0                    69.8189        69.8189           240.030749   
1                    47.8545        47.8545           198.298778   
2                    29.7961        29.7961           176.949558   
3                    15.4943        15.4943           126.825805   
4                     3.8415         3.8415             0.006457   

   Critical Value (5%) Max-Eigen  Prob. (Max-Eigen)  
0                        77.8202            77.8202  
1                        54.6815            54.6815  
2                        35.4628            35.4628  
3             