In [8]:
import os
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings("ignore")
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.stattools import adfuller

from preprocess import VN30, TARGETS, _process_file

In [None]:
for symbol in VN30:
	df, _ = _process_file(symbol)
	p_values_list = [acorr_ljungbox(df[col], lags=[30], return_df=True)['lb_pvalue'].values[0] for col in TARGETS]
	print(f"p-values for {symbol}: {p_values_list}")


p-values for ACB: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]
p-values for BCM: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]
p-values for BID: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]
p-values for BVH: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]
p-values for CTG: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]
p-values for FPT: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]
p-values for GAS: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]
p-values for GVR: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]
p-values for HDB: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]
p-values for HPG: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]
p-values for LPB: [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]
p-values for MBB: [np.float64(0.0), np.floa

In [7]:
# Đọc lại dữ liệu
for symbol in VN30:
	print(f"Processing {symbol}...")
	df, _ = _process_file(symbol)
	max_lag = 30 

	# Tạo ma trận p-value
	pval_matrix = pd.DataFrame(index=TARGETS, columns=TARGETS, dtype=float)

	# Kiểm định Granger cho từng cặp chiều (nguyên nhân → kết quả)
	for cause in TARGETS:
		for effect in TARGETS:
			if cause == effect:
				pval_matrix.loc[cause, effect] = np.nan
				continue
			# Dữ liệu gồm 2 cột: [effect, cause]
			test_result = grangercausalitytests(df[[effect, cause]], maxlag=max_lag, verbose=False)
			# Lấy p-value nhỏ nhất trong các lag
			min_pvalue = min(test_result[lag][0]['ssr_chi2test'][1] for lag in range(1, max_lag + 1))
			pval_matrix.loc[cause, effect] = min_pvalue
	
	print(pval_matrix)

Processing ACB...
                open           high            low     close
open             NaN   3.186204e-07   1.503439e-15  0.000607
high   3.544558e-255            NaN   9.899201e-45  0.008266
low    5.943174e-198   1.433802e-18            NaN  0.002256
close   0.000000e+00  3.975568e-170  6.264399e-250       NaN
Processing BCM...
                open           high            low         close
open             NaN   2.081378e-07   1.116691e-03  7.191078e-05
high   3.800171e-231            NaN   3.464386e-82  3.539560e-09
low     2.524267e-90   1.562626e-12            NaN  4.413814e-04
close   0.000000e+00  3.637922e-138  5.443153e-280           NaN
Processing BID...
                open           high            low     close
open             NaN   8.563889e-11   7.969439e-17  0.005077
high   2.119985e-270            NaN   6.828355e-56  0.000002
low    3.152517e-207   7.229714e-11            NaN  0.118707
close   0.000000e+00  1.253454e-144  7.727710e-276       NaN
Processing 

In [9]:
def adf_test(series):
    result = adfuller(series, autolag='AIC')
    return result[1]  # Return p-value

for symbol in VN30:
	print(f"Processing {symbol}...")
	df, _ = _process_file(symbol)
	pvals_adf = []

	# Perform ADF test for each column and its first difference
	for col in TARGETS:
		original_p = adf_test(df[col])
		diff_p = adf_test(df[col].diff().dropna())
		pvals_adf.append([original_p, diff_p])

	# Create DataFrame for results
	adf_results = pd.DataFrame(pvals_adf, columns=["Original Series", "1st Difference"], index=TARGETS)
	print(adf_results)

Processing ACB...
       Original Series  1st Difference
open          0.777095    4.232630e-30
high          0.760448    0.000000e+00
low           0.794568    3.623264e-30
close         0.755801    0.000000e+00
Processing BCM...
       Original Series  1st Difference
open          0.539444    1.869708e-13
high          0.581507    0.000000e+00
low           0.575589    0.000000e+00
close         0.522453    8.262781e-12
Processing BID...
       Original Series  1st Difference
open          0.411611    0.000000e+00
high          0.229944    2.525333e-30
low           0.297085    2.885187e-18
close         0.341987    2.537555e-27
Processing BVH...
       Original Series  1st Difference
open          0.135900             0.0
high          0.151728             0.0
low           0.167578             0.0
close         0.156512             0.0
Processing CTG...
       Original Series  1st Difference
open          0.397294    7.642953e-28
high          0.439879    1.424400e-20
low          