In [None]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
from scipy.stats import ttest_ind

tickers = ["AAPL", "GOOGL", "META", "AMZN", "MSFT"]
summary_results = []

for ticker in tickers:
    print(f"▶ 分析中: {ticker}")
    try:
        # データ読み込み
        df = pd.read_csv(f"../data/{ticker}_news_with_sentiment.csv")
        df['date'] = pd.to_datetime(df['datetime']).dt.strftime('%Y-%m-%d')
        sentiment_daily = df.groupby('date')['sentiment_score'].mean().reset_index()
        sentiment_daily.rename(columns={'date': 'Date', 'sentiment_score': 'SentimentScore'}, inplace=True)

        # 株価取得
        end_date = datetime.today()
        start_date = end_date - timedelta(days=90)
        stock = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))
        if isinstance(stock.columns, pd.MultiIndex):
            stock.columns = stock.columns.get_level_values(0)

        # ✅ 終値と日付だけ抽出・整形（階層を潰す）
        stock = stock[['Close']].reset_index()
        stock.columns = ['Date', 'Close']  # カラム名を1段に
        stock['Date'] = pd.to_datetime(stock['Date']).dt.strftime('%Y-%m-%d')

        # マージとリターン
        merged = pd.merge(stock, sentiment_daily, on='Date', how='inner')
        merged['NextClose'] = merged['Close'].shift(-1)
        merged['Return1d'] = (merged['NextClose'] - merged['Close']) / merged['Close']

        # t検定と相関
        pos = merged[merged['SentimentScore'] > 0]['Return1d'].dropna()
        neg = merged[merged['SentimentScore'] < 0]['Return1d'].dropna()
        t_stat, p_value = ttest_ind(pos, neg, equal_var=False)
        corr = merged[['SentimentScore', 'Return1d']].corr().iloc[0, 1]

        # 感情スコア付きの元データ（1記事ごと）
        df = pd.read_csv(f"../data/{ticker}_news_with_sentiment.csv")

        # 🆕 全記事数
        num_articles = len(df)

        # 🆕 感情別の記事数（positive, negative, neutral）
        n_positive_articles = (df['sentiment_score'] > 0).sum()
        n_negative_articles = (df['sentiment_score'] < 0).sum()
        n_neutral_articles = (df['sentiment_score'] == 0).sum()


        summary_results.append({
            'Ticker': ticker,
            'N_Pos': len(pos),  # 株価リターン付きの「日数」ベース
            'N_Neg': len(neg),
            'Article_Pos': n_positive_articles,  # 🆕 ポジティブ記事数
            'Article_Neg': n_negative_articles,  # 🆕 ネガティブ記事数
            'Article_Neutral': n_neutral_articles,
            'Total_Articles': num_articles,
            'Mean_Pos': pos.mean(),
            'Mean_Neg': neg.mean(),
            'T_Stat': t_stat,
            'P_Value': p_value,
            'Correlation': corr
        })
    except Exception as e:
        print(f"An error has occured: {ticker} → {e}")

summary_df = pd.DataFrame(summary_results)
summary_df

In [None]:
summary_df[["Ticker", "Correlation"]].set_index("Ticker").plot.barh(figsize=(6,4), legend=False, title="Correlation (Sentiment vs Return)")