In [526]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import grangercausalitytests

In [350]:
import pandas as pd

# 读取三个数据集
df_vix = pd.read_csv("daily_vix.csv", parse_dates=["date"])
df_nvda = pd.read_csv("daily_nvda.csv", parse_dates=["Date"]).rename(columns={"Date": "date"})
df_sentiscores = pd.read_csv("daily_sentiment_bert.csv", parse_dates=["date"])

# 筛选日期范围
start_date = "2017-02-28"
end_date = "2022-02-28"
date_range = pd.date_range(start=start_date, end=end_date)
df_all = pd.DataFrame({"date": date_range})

# 添加前缀：VIX
df_vix = df_vix[df_vix["date"].between(start_date, end_date)]
df_vix = df_vix.rename(columns=lambda x: f"vix_{x}" if x != "date" else x)

# 添加前缀：NVDA
df_nvda.columns = [col.lower() for col in df_nvda.columns]
df_nvda = df_nvda[df_nvda["date"].between(start_date, end_date)]
df_nvda = df_nvda.rename(columns=lambda x: f"nvda_{x}" if x != "date" else x)

# 情感数据也筛选日期范围
df_sentiscores = df_sentiscores[df_sentiscores["date"].between(start_date, end_date)]

# 依次合并
df_merged = df_all.merge(df_sentiscores, on="date", how="left") \
                  .merge(df_vix, on="date", how="left") \
                  .merge(df_nvda, on="date", how="left")

# 查看缺失值
print("Missing values per column:")
print(df_merged.isnull().sum())

# 查看合并后的数据
print(df_merged.head())

# 保存结果
df_merged.to_csv("factor_data.csv", index=False)

Missing values per column:
date                     0
avg_sentiment_score      0
model_type               0
vix_open               567
vix_high               567
vix_low                567
vix_close              567
nvda_open              567
nvda_high              567
nvda_low               567
nvda_close             567
nvda_volume            567
dtype: int64
        date  avg_sentiment_score    model_type  vix_open  vix_high  vix_low  \
0 2017-02-28            -0.029851  FinBERT+BERT     12.19     12.96    12.13   
1 2017-03-01             0.075949  FinBERT+BERT     12.31     12.58    11.78   
2 2017-03-02             0.187500  FinBERT+BERT     12.43     12.71    11.32   
3 2017-03-03             0.014925  FinBERT+BERT     11.96     11.97    10.94   
4 2017-03-04            -0.142857  FinBERT+BERT       NaN       NaN      NaN   

   vix_close  nvda_open  nvda_high  nvda_low  nvda_close  nvda_volume  
0      12.92    2.58514    2.59484   2.48609     2.50336  614325793.0  
1      12.5

In [528]:
# 加载合并后的数据
df = pd.read_csv("factor_data.csv", parse_dates=["date"])
df.set_index("date", inplace=True)

# 创建目标变量：未来收益（5日收益）
df["nvda_return_5d"] = df["nvda_close"].pct_change(5).shift(-5)

# 选择因果检验变量（必须无缺失）
granger_df = df[["nvda_return_5d", "vix_close", "avg_sentiment_score"]].dropna()

# 对数据进行标准化（可选，但可提高检验稳定性）
from sklearn.preprocessing import StandardScaler
granger_df_scaled = pd.DataFrame(StandardScaler().fit_transform(granger_df),
                                 columns=granger_df.columns,
                                 index=granger_df.index)

# Granger 检验：VIX 是否 Granger 导致 NVDA 收益
print("VIX → NVDA 收益:")
grangercausalitytests(granger_df_scaled[["nvda_return_5d", "vix_close"]], maxlag=5, verbose=True)

# Granger 检验：情绪是否 Granger 导致 NVDA 收益
print("\n情绪 → NVDA 收益:")
grangercausalitytests(granger_df_scaled[["nvda_return_5d", "avg_sentiment_score"]], maxlag=5, verbose=True)

VIX → NVDA 收益:

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=0.1288  , p=0.7197  , df_denom=1253, df_num=1
ssr based chi2 test:   chi2=0.1291  , p=0.7194  , df=1
likelihood ratio test: chi2=0.1291  , p=0.7194  , df=1
parameter F test:         F=0.1288  , p=0.7197  , df_denom=1253, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=17.7571 , p=0.0000  , df_denom=1250, df_num=2
ssr based chi2 test:   chi2=35.6563 , p=0.0000  , df=2
likelihood ratio test: chi2=35.1591 , p=0.0000  , df=2
parameter F test:         F=17.7571 , p=0.0000  , df_denom=1250, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=18.6701 , p=0.0000  , df_denom=1247, df_num=3
ssr based chi2 test:   chi2=56.3248 , p=0.0000  , df=3
likelihood ratio test: chi2=55.0965 , p=0.0000  , df=3
parameter F test:         F=18.6701 , p=0.0000  , df_denom=1247, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F

{1: ({'ssr_ftest': (0.17424934964047448, 0.6764340755899128, 1253.0, 1),
   'ssr_chi2test': (0.17466654680641333, 0.6759973687298704, 1),
   'lrtest': (0.1746544028669632, 0.676007991683863, 1),
   'params_ftest': (0.17424934964013805, 0.6764340755900347, 1253.0, 1.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x3011b8d60>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x301275840>,
   array([[0., 1., 0.]])]),
 2: ({'ssr_ftest': (0.4449572043172673, 0.6409531652129379, 1250.0, 2),
   'ssr_chi2test': (0.8934740662690728, 0.639712109246358, 2),
   'lrtest': (0.8931561709623566, 0.6398137980663331, 2),
   'params_ftest': (0.4449572043174569, 0.6409531652128044, 1250.0, 2.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x311c353f0>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x30103baf0>,
   array([[0., 0., 1., 0., 0.],
          [0., 0., 0., 1., 0.]])]),
 3: ({'ssr_ftest': (1.653947875232367

Understanding the dynamic interactions between asset returns, market volatility, and investor sentiment is central to both academic research and practical investment strategy. While existing literature often emphasizes how forward-looking indicators such as the VIX and sentiment can predict asset returns, it is equally important to examine whether asset returns themselves feed back into these variables. Such feedback effects could reflect how market participants update their expectations and emotional responses after observing asset price movements.

To investigate this, we conduct reverse Granger causality tests from NVDA stock returns to the VIX and sentiment scores. The results reveal that NVDA returns Granger-cause the VIX at lag 1 with strong statistical significance (p = 0.0002), indicating that changes in NVDA’s price may trigger immediate adjustments in market volatility expectations on the following day. However, this effect quickly dissipates, as no significant causality is observed at longer lags, suggesting that the influence of returns on volatility is short-lived and reactive.

In [536]:
# 反向因果检验

df_merged['nvda_return'] = df_merged['nvda_close'].pct_change()
df_merged = df_merged.dropna(subset=['nvda_return'])  # 去除NaN

from statsmodels.tsa.stattools import grangercausalitytests

# 设置最大滞后阶
max_lag = 5

# 检验：NVDA收益 → VIX
print("NVDA 收益 → VIX:")
data_nvda_to_vix = df_merged[['vix_close', 'nvda_return']].dropna()
grangercausalitytests(data_nvda_to_vix, maxlag=max_lag, verbose=True)

# 检验：NVDA收益 → 情绪
print("\nNVDA 收益 → 情绪:")
data_nvda_to_sentiment = df_merged[['avg_sentiment_score', 'nvda_return']].dropna()
grangercausalitytests(data_nvda_to_sentiment, maxlag=max_lag, verbose=True)

NVDA 收益 → VIX:

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=14.3258 , p=0.0002  , df_denom=1254, df_num=1
ssr based chi2 test:   chi2=14.3600 , p=0.0002  , df=1
likelihood ratio test: chi2=14.2786 , p=0.0002  , df=1
parameter F test:         F=14.3258 , p=0.0002  , df_denom=1254, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=0.3313  , p=0.7180  , df_denom=1251, df_num=2
ssr based chi2 test:   chi2=0.6653  , p=0.7170  , df=2
likelihood ratio test: chi2=0.6651  , p=0.7171  , df=2
parameter F test:         F=0.3313  , p=0.7180  , df_denom=1251, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=1.4768  , p=0.2191  , df_denom=1248, df_num=3
ssr based chi2 test:   chi2=4.4553  , p=0.2163  , df=3
likelihood ratio test: chi2=4.4474  , p=0.2170  , df=3
parameter F test:         F=1.4768  , p=0.2191  , df_denom=1248, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F

{1: ({'ssr_ftest': (0.9302858366913659, 0.33491672730092426, 1821.0, 1),
   'ssr_chi2test': (0.9318184327979414, 0.33439085183361283, 1),
   'lrtest': (0.9315804969473902, 0.3344525704327106, 1),
   'params_ftest': (0.9302858366932205, 0.3349167273004511, 1821.0, 1.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1579dc220>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x327e2b640>,
   array([[0., 1., 0.]])]),
 2: ({'ssr_ftest': (3.048364445519725, 0.04767898977566788, 1818.0, 2),
   'ssr_chi2test': (6.11349657225793, 0.047040408744697976, 2),
   'lrtest': (6.103268516483695, 0.04728159088729954, 2),
   'params_ftest': (3.048364445519346, 0.0476789897756844, 1818.0, 2.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x157fa1a80>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x157fa3850>,
   array([[0., 0., 1., 0., 0.],
          [0., 0., 0., 1., 0.]])]),
 3: ({'ssr_ftest': (2.407172093429316

For sentiment, we find marginal Granger causality at lag 2 (p = 0.0477), implying that returns may have a modest delayed effect on public or investor sentiment. Again, the effect is not persistent across longer lags, reinforcing the interpretation that return-driven sentiment shifts are temporary.

In sum, these findings suggest an asymmetric relationship: while VIX and sentiment are useful in forecasting NVDA returns, the reverse direction—from returns to VIX or sentiment—reflects short-term feedback or emotional adjustment mechanisms. This supports the theoretical view that volatility and sentiment act more as leading indicators, whereas returns serve as real-time signals to which market participants respond in the short term.