# Preprocessing Data

（修改news的输入）import包和创建路径

In [10]:
import os
import pandas as pd
import numpy as np
import shutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Input, GRU, Dropout, Dense
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

script_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in globals() else os.getcwd()
price_dir = os.path.join(script_dir, 'price')
news_dir = os.path.join(script_dir, 'sampled_news') # ‘1years’ to ‘10years’
filepath_all = os.path.join(news_dir, 'all_last_1years.csv') # ‘all_last_1years.csv’ to ‘all_last_10years.csv’
filepath_nas = os.path.join(news_dir, 'nasdaq_last_1years.csv') # ‘nasdaq_last_1years.csv’ to ‘nasdaq_last_10years.csv’


## Price

读取wiki（一次性，不用运行）把sp500的所有股票名导出

In [None]:
# url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
# tables = pd.read_html(url)
# sp500_df = tables[0]  # The first table contains the S&P 500 components

# # Save to CSV
# sp500_df.to_csv("sp500_companies.csv", index=False)
# print("S&P 500 CSV saved successfully!")

读入symbol

In [14]:
# 读取标普500数据并筛选2023年前加入的公司
symbol = pd.read_csv(os.path.join(script_dir, 'sp500_companies.csv'))
symbol['Date_added'] = pd.to_datetime(symbol['Date_added'], errors='coerce')
symbol_before_2023 = symbol[symbol['Date_added'] < pd.Timestamp('2022-12-31')]
valid_symbols = set(symbol_before_2023['Symbol'].dropna().astype(str))


  symbol['Date_added'] = pd.to_datetime(symbol['Date_added'], errors='coerce')


In [15]:
len(valid_symbols)

468

（一次性）把筛选数量和10年后的股票price表存到一个文件夹里（435支股票）->stock_price_data_preprocessed

按照5年则是403只股票
按照1年是468

In [16]:
output_dir = os.path.join(script_dir, 'stock_price_data_preprocessed')
os.makedirs(output_dir, exist_ok=True)


symbol435 = []

# 遍历price目录下的CSV文件
for filename in os.listdir(price_dir):
    if filename.endswith('.csv'):
        symbol_name = filename.replace('.csv', '')
        if symbol_name in valid_symbols:
            symbol435.append(symbol_name)
            file_path = os.path.join(price_dir, filename)
            df = pd.read_csv(file_path, parse_dates=['date'])
            df = df[df['date'] > pd.Timestamp('2022-12-31')]
            if not df.empty:
                df = df.rename(columns={'date': 'Date'})
                output_file_path = os.path.join(output_dir, filename)
                df.to_csv(output_file_path, index=False)

print(len(symbol435))


425


## News

读入news数据

In [17]:
# 读入news数据

nas = pd.read_csv(filepath_nas, parse_dates=['Date'])
nas = nas[nas['Stock_symbol'].isin(symbol_before_2023['Symbol'])]
#nas.to_csv('news_321.csv', index=False)

# chunk_size = 100
# for chunk in pd.read_csv(filepath_nas, chunksize=chunk_size):
#     nas_1 = chunk
#     break


In [12]:
nas.head(5)

Unnamed: 0.1,Unnamed: 0,Date,Article_title,Stock_symbol,Url,Publisher,Author,Article,Lsa_summary,Luhn_summary,Textrank_summary,Lexrank_summary
0,0.0,2023-12-16 23:00:00+00:00,Interesting A Put And Call Options For August ...,A,https://www.nasdaq.com/articles/interesting-a-...,,,"Investors in Agilent Technologies, Inc. (Symbo...",Because the $125.00 strike represents an appro...,The current analytical data (including greeks ...,Below is a chart showing the trailing twelve m...,"At Stock Options Channel, our YieldBoost formu..."
1,1.0,2023-12-12 00:00:00+00:00,Wolfe Research Initiates Coverage of Agilent T...,A,https://www.nasdaq.com/articles/wolfe-research...,,,"Fintel reports that on December 13, 2023, Wolf...","Fintel reports that on December 13, 2023, Wolf...","T. Rowe Price Investment Management holds 10,1...",Agilent Technologies Declares $0.24 Dividend O...,The projected annual revenue for Agilent Techn...
2,2.0,2023-12-12 00:00:00+00:00,Agilent Technologies Reaches Analyst Target Price,A,https://www.nasdaq.com/articles/agilent-techno...,,,"In recent trading, shares of Agilent Technolog...","In recent trading, shares of Agilent Technolog...","In recent trading, shares of Agilent Technolog...",When a stock reaches the target an analyst has...,When a stock reaches the target an analyst has...
3,3.0,2023-12-07 00:00:00+00:00,Agilent (A) Enhances BioTek Cytation C10 With ...,A,https://www.nasdaq.com/articles/agilent-a-enha...,,,Agilent Technologies A is enhancing its BioTek...,"Per a Grand View Research report, the global m...","Notably, Agilent enhanced the BioTek Cytation ...","Agilent Technologies, Inc. Price and Consensus...","Notably, Agilent enhanced the BioTek Cytation ..."
4,4.0,2023-12-07 00:00:00+00:00,"Pre-Market Most Active for Dec 7, 2023 : SQQQ,...",A,https://www.nasdaq.com/articles/pre-market-mos...,,,The NASDAQ 100 Pre-Market Indicator is up 70.2...,ProShares UltraPro Short QQQ (SQQQ) is -0.15 a...,"As reported by Zacks, the current mean recomme...","The total Pre-Market volume is currently 39,23...",The NASDAQ 100 Pre-Market Indicator is up 70.2...


检查过之后，新闻是零时区的（UTC），但是price应该是按照美东时间的。所以需要调整一下，把美东时间收盘之前的新闻算成前一天的，那么预测的时候就可以直接用“上一日”新闻去预测当日close。

具体的做法是，把新闻时间先转成UTC，然后转成美东时间，然后看是不是早于16点，如果是，那么日期提前一天。

In [18]:
# 处理日期：调整时区并根据美东时间16点规则调整日期
# 检查日期是否已经有时区信息
# 如果没有时区信息，添加UTC时区
if nas['Date'].dt.tz is None:
    nas['Date'] = nas['Date'].dt.tz_localize('UTC')
    
# 无论之前有无时区，统一转换到美东时间
nas['Date'] = nas['Date'].dt.tz_convert('US/Eastern')

# 创建一个新列，用于判断时间是否早于当天美东时间16点
nas['before_market_close'] = nas['Date'].dt.hour <= 16

# 对于早于16点的记录，日期-1天
nas['Date'] = nas.apply(
    lambda row: row['Date'] - pd.Timedelta(days=1) if row['before_market_close'] else row['Date'], 
    axis=1
)

# 只保留年月日，去掉时间和时区信息
nas['Date'] = nas['Date'].dt.normalize().dt.tz_localize(None)

# 删除辅助列
nas.drop('before_market_close', axis=1, inplace=True)

# 显示处理后的前几行数据
print("处理后的日期格式：")
print(nas[['Date', 'Stock_symbol']].head())


处理后的日期格式：
        Date Stock_symbol
0 2023-12-16            A
1 2023-12-11            A
2 2023-12-11            A
3 2023-12-06            A
4 2023-12-06            A


finbert打分

In [19]:
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline
import pandas as pd
from tqdm import tqdm
import os
import torch



# 加载FinBERT模型和tokenizer
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone', num_labels=3)
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')

# 创建情感分析pipeline
nlp = pipeline("text-classification", model=finbert, tokenizer=tokenizer)

# 读取数据
nas_1 = nas

def truncate_text(text, max_length=500):
    """将文本截断到最大长度"""
    return text[:max_length] if len(text) > max_length else text

def get_stock_sentiment(summary, stock_symbol, title=None):
    try:
        # 检查summary是否为NaN或非字符串类型
        if pd.isna(summary) or not isinstance(summary, str):
            # 如果提供了title且不是NaN且是字符串，则使用title
            if title is not None and not pd.isna(title) and isinstance(title, str):
                print(f"注意: 股票 {stock_symbol} 的摘要不可用，使用标题进行分析")
                text_to_analyze = title
            else:
                print(f"警告: 股票 {stock_symbol} 的摘要和标题都不可用，默认填充为中性(0)")
                return 0  # 默认填充为中性
        else:
            text_to_analyze = summary
            
        # 截断文本以避免长度问题
        truncated_text = text_to_analyze[:500] if len(text_to_analyze) > 500 else text_to_analyze
        
        # 使用FinBERT进行情感分析
        result = nlp(truncated_text)[0]
        
        # 将FinBERT的输出映射到1, 0, -1
        label_map = {
            'Positive': 1,
            'Neutral': 0,
            'Negative': -1
        }
        
        return label_map.get(result['label'], 0)  # 如果标签异常，也返回0
    except Exception as e:
        print(f"警告: 处理股票 {stock_symbol} 的文本时出错: {e}, 默认填充为中性(0)")
        return 0  # 遇到任何错误都返回中性


# 应用情绪分析，带进度条
sentiments = []
for _, row in tqdm(nas_1.iterrows(), total=len(nas_1), desc="Analyzing sentiment"):
    sentiment = get_stock_sentiment(row['Lsa_summary'], row['Stock_symbol'], row['Article_title'])
    sentiments.append(sentiment)

nas_1['Sentiment'] = sentiments


  from .autonotebook import tqdm as notebook_tqdm
Error while downloading from https://cdn-lfs.hf.co/yiyanghkust/finbert-tone/f31c2036e91c9854bcc35141d16669dd07b9726adfe391d1011bff1de7ea4b32?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27pytorch_model.bin%3B+filename%3D%22pytorch_model.bin%22%3B&response-content-type=application%2Foctet-stream&Expires=1751801870&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc1MTgwMTg3MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby95aXlhbmdoa3VzdC9maW5iZXJ0LXRvbmUvZjMxYzIwMzZlOTFjOTg1NGJjYzM1MTQxZDE2NjY5ZGQwN2I5NzI2YWRmZTM5MWQxMDExYmZmMWRlN2VhNGIzMj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=PY9rvlIzjOMNQ6e6xJa%7EaWd8xklfFB80lNxnVgXqFx4l83jDTOLi0QoSRd1UT7govnZyw9ZbIY9tdHcUZnUuv9YtrdqXxjVKb1Tf5tJJqJcguHkF-gIhDV-xTeem8gLiTeNWZNUL-NRL8TBVOVSOYMZOj%7Epebx8CZ6lu%7ES%7EnSrY-oyiX%7EWqCfWMtpff89gtBtKpO%7ENZAamZyA0SOv1ACjxQVi84czrXhr0qz4uTvaiHK-nDixPONqoxrAU

In [25]:
# 检查VRTX和ZTS股票的lsa_summary属性及其类型
vrtx_data = nas_1[nas_1['Stock_symbol'] == 'VRTX']['Lsa_summary']
zts_data = nas_1[nas_1['Stock_symbol'] == 'ZTS']['Lsa_summary']

# 检查VRTX数据类型
print("VRTX股票的lsa_summary样本:")
print(vrtx_data.head())
print(f"VRTX股票的lsa_summary总数: {len(vrtx_data)}")

# 统计VRTX不同类型的数量
vrtx_types = vrtx_data.apply(type).value_counts()
print("\nVRTX股票的lsa_summary类型统计:")
for type_name, count in vrtx_types.items():
    print(f"  - {type_name.__name__}: {count}个")

# 检查ZTS数据类型
print("\nZTS股票的lsa_summary样本:")
print(zts_data.head())
print(f"ZTS股票的lsa_summary总数: {len(zts_data)}")

# 统计ZTS不同类型的数量
zts_types = zts_data.apply(type).value_counts()
print("\nZTS股票的lsa_summary类型统计:")
for type_name, count in zts_types.items():
    print(f"  - {type_name.__name__}: {count}个")

# 展示几个类型为float的数据样本
print("\n类型为float的lsa_summary样本:")
float_samples = nas_1[nas_1['Lsa_summary'].apply(lambda x: isinstance(x, float))].head(5)
print(float_samples)
print(f"float类型的样本总数: {len(nas_1[nas_1['Lsa_summary'].apply(lambda x: isinstance(x, float))])}")

VRTX股票的lsa_summary样本:
881263    As a result, it might be a smart move to start...
881264    Below is Validea's guru fundamental report for...
881265    Stocks recently featured in the blog include: ...
881266    Nonetheless, a few big drugmakers, Novo Nordis...
881267    Vertex Pharmaceuticals Vertex Pharmaceuticals ...
Name: Lsa_summary, dtype: object
VRTX股票的lsa_summary总数: 636

VRTX股票的lsa_summary类型统计:
  - str: 636个

ZTS股票的lsa_summary样本:
949403    Investors eyeing a purchase of Zoetis Inc (Sym...
949404    Stocks recently featured in the blog include: ...
949405    Zoetis Inc - Class A (ZTS) shares closed today...
949406    Four such stocks are Invitation Homes Inc. INV...
949407    Here are this week’s stocks: Zoetis (ZTS): Thi...
Name: Lsa_summary, dtype: object
ZTS股票的lsa_summary总数: 200

ZTS股票的lsa_summary类型统计:
  - str: 200个

类型为float的lsa_summary样本:
Empty DataFrame
Columns: [Unnamed: 0, Date, Article_title, Stock_symbol, Url, Publisher, Author, Article, Lsa_summary, Luhn_summary, Text

In [26]:
nas_2 = nas_1[['Date', 'Stock_symbol', 'Sentiment']]
# 将Sentiment列改名为Sentiment_Vader
nas_2 = nas_2.rename(columns={'Sentiment': 'Sentiment_vader'})

# 保存结果
output_dir = os.path.join(script_dir, 'finbert_nas_scored.csv')
nas_2.to_csv(output_dir, index=False)
print("✅ Sentiment analysis complete.")

✅ Sentiment analysis complete.


In [27]:
nas_1.columns

Index(['Unnamed: 0', 'Date', 'Article_title', 'Stock_symbol', 'Url',
       'Publisher', 'Author', 'Article', 'Lsa_summary', 'Luhn_summary',
       'Textrank_summary', 'Lexrank_summary', 'Sentiment'],
      dtype='object')

把情绪分析前的news表拆成不同股票的csv，但已经命名为sentiment，下一步就可以直接在原表基础上加一列情绪评分列

In [28]:
out_dir = os.path.join(script_dir, 'news_data_sentiment_scored')
os.makedirs(out_dir, exist_ok=True)
newscount = []
# 按照 Stock_symbol 分组并保存为独立的CSV文件
for stock_symbol, group_df in nas_2.groupby('Stock_symbol'):
    if pd.notna(stock_symbol) and str(stock_symbol).strip() != "":
        newscount.append(stock_symbol)
        filename = f"{stock_symbol}.csv"
        output_path = os.path.join(out_dir, filename)
        group_df.to_csv(output_path, index=False)

print(len(newscount))
#358

350


## Sentiment Aggregation

聚合price和news并填充空值（decay）

（把price_1y.csv换成price_10years.csv）


In [30]:
import chardet

def convert_to_utc(df, date_column):
    """
    将DataFrame中的日期列转换为UTC格式。
    """
    df[date_column] = pd.to_datetime(df[date_column])
    if df[date_column].dt.tz is None:  # 检查是否已经是UTC格式
        df[date_column] = df[date_column].dt.tz_localize('UTC')
    return df


def fill_missing_dates_with_log_decay(df, date_column, sentiment_column):
    # 确保日期格式正确
    df[date_column] = pd.to_datetime(df[date_column])

    # 创建完整日期范围
    date_range = pd.date_range(start=df[date_column].min(), end=df[date_column].max())

    # 创建一个新的DataFrame，包含所有日期
    full_df = pd.DataFrame(date_range, columns=[date_column])
    full_df = pd.merge(full_df, df, on=date_column, how='left')

    # 初始化News_flag列
    full_df['News_flag'] = full_df[sentiment_column].notna().astype(int)

    # 使用对数衰减规则填充缺失的情感值
    last_valid_sentiment = None
    last_valid_date = None
    for i, row in full_df.iterrows():
        if pd.isna(row[sentiment_column]):
            if last_valid_sentiment is not None:
                days_since_last_valid = (row[date_column] - last_valid_date).days
                decayed_sentiment = last_valid_sentiment * (np.log(2) ** days_since_last_valid)
                full_df.at[i, sentiment_column] = decayed_sentiment
                full_df.at[i, 'News_flag'] = 0
        else:
            last_valid_sentiment = row[sentiment_column]
            last_valid_date = row[date_column]

    return full_df


def fill_missing_dates_with_exponential_decay(df, date_column, sentiment_column, sentiment_key_name, decay_rate=0.05):
    # 确保日期格式正确
    df[date_column] = pd.to_datetime(df[date_column])

    # 创建完整日期范围
    date_range = pd.date_range(start=df[date_column].min(), end=df[date_column].max())

    # 创建一个新的DataFrame，包含所有日期
    full_df = pd.DataFrame(date_range, columns=[date_column])
    full_df = pd.merge(full_df, df, on=date_column, how='left')

    # 初始化News_flag列
    full_df['News_flag'] = full_df[sentiment_column].notna().astype(int)

    # 检查并添加Positive和Negative列
    if 'Positive' in df.columns and 'Positive' not in full_df.columns:
        full_df['Positive'] = np.nan
    if 'Negative' in df.columns and 'Negative' not in full_df.columns:
        full_df['Negative'] = np.nan

    # 使用指数递减规则填充缺失的情感值
    last_valid_sentiment = None
    last_valid_date = None
    for i, row in full_df.iterrows():
        if pd.isna(row[sentiment_column]):
            if last_valid_sentiment is not None:
                days_since_last_valid = (row[date_column] - last_valid_date).days
                decayed_sentiment = 0
                if sentiment_key_name == "Sentiment_gpt":
                    decayed_sentiment = 3 + (last_valid_sentiment - 3) * np.exp(-decay_rate * days_since_last_valid)
                elif sentiment_key_name == "Sentiment_vader":
                    decayed_sentiment = last_valid_sentiment * np.exp(-decay_rate * days_since_last_valid)
                full_df.at[i, sentiment_column] = decayed_sentiment
                full_df.at[i, 'News_flag'] = 0
                # 缺失日期的正负面新闻数量设为0
                if 'Positive' in full_df.columns:
                    full_df.at[i, 'Positive'] = 0
                if 'Negative' in full_df.columns:
                    full_df.at[i, 'Negative'] = 0
        else:
            last_valid_sentiment = row[sentiment_column]
            last_valid_date = row[date_column]

    return full_df

def integrate_data(stock_price_df, news_df, stock_price_csv_file, sentiment_key_name):
    # 创建原始DataFrame的副本
    stock_price_df_copy = stock_price_df.copy()
    news_df_copy = news_df.copy()
    # 2. 转换日期格式并排序
    stock_price_df_copy = convert_to_utc(stock_price_df_copy, 'Date')
    news_df_copy = convert_to_utc(news_df_copy, 'Date')

    stock_price_df_copy['Date'] = pd.to_datetime(stock_price_df_copy['Date'])
    news_df_copy['Date'] = pd.to_datetime(news_df_copy['Date'])

    # 将日期时间对齐到当天开始
    stock_price_df_copy['Date'] = pd.to_datetime(stock_price_df_copy['Date']).dt.normalize()
    news_df_copy['Date'] = pd.to_datetime(news_df_copy['Date']).dt.normalize()

    # 添加新闻情感分类列
    if sentiment_key_name == "Sentiment_gpt":
        # GPT情感分数: 1-5，默认为3(中性)，>3为正面，<3为负面
        news_df_copy['Positive'] = (news_df_copy[sentiment_key_name] > 3).astype(int)
        news_df_copy['Negative'] = (news_df_copy[sentiment_key_name] < 3).astype(int)
    elif sentiment_key_name == "Sentiment_vader":
        # VADER情感分数: -1到1，>0为正面，<0为负面
        news_df_copy['Positive'] = (news_df_copy[sentiment_key_name] > 0).astype(int)
        news_df_copy['Negative'] = (news_df_copy[sentiment_key_name] < 0).astype(int)

    stock_price_df_copy.set_index('Date', inplace=True)
    news_df_copy.set_index('Date', inplace=True)

    # 按照日期进行排序
    stock_price_df_copy.sort_index(inplace=True)
    news_df_copy.sort_index(inplace=True)
    if sentiment_key_name == "Sentiment_vader":
        # 将大于 1 的值替换为 1
        news_df_copy.loc[news_df_copy['Sentiment_vader'] > 1, 'Sentiment_vader'] = 1

        # 将小于 -1 的值替换为 -1
        news_df_copy.loc[news_df_copy['Sentiment_vader'] < -1, 'Sentiment_vader'] = -1

    # 3. 计算news.csv中每天的情感平均值和正负面新闻数量
    daily_sentiment = news_df_copy.groupby('Date').agg({
        sentiment_key_name: 'mean',
        'Positive': 'sum',
        'Negative': 'sum'
    }).reset_index()

    # 使用递减规则填充缺失的日期
    average_sentiment_filled = fill_missing_dates_with_exponential_decay(daily_sentiment, 'Date', sentiment_key_name, sentiment_key_name)
    
    # 确保正面和负面新闻计数在缺失日期为0
    if 'Positive' not in average_sentiment_filled.columns:
        average_sentiment_filled['Positive'] = 0
    if 'Negative' not in average_sentiment_filled.columns:
        average_sentiment_filled['Negative'] = 0
    
    average_sentiment_filled['Positive'].fillna(0, inplace=True)
    average_sentiment_filled['Negative'].fillna(0, inplace=True)

    # 4. 合并数据
    merged_df = pd.merge(stock_price_df_copy, average_sentiment_filled, on='Date', how='left')
    # 将NaN替换为3
    merged_df[sentiment_key_name].fillna(3, inplace=True)
    merged_df['Positive'].fillna(0, inplace=True)
    merged_df['Negative'].fillna(0, inplace=True)

    df_cleaned = merged_df.dropna(subset=['News_flag'])

    # 过滤掉sentiment列为0的行
    df_cleaned = df_cleaned[df_cleaned[sentiment_key_name] != 0]
    if sentiment_key_name == "Sentiment_gpt":
        df_cleaned['Scaled_sentiment'] = df_cleaned[sentiment_key_name].apply(lambda x: (x - 0.9999) / 4)
    elif sentiment_key_name == "Sentiment_vader":
        df_cleaned['Scaled_sentiment'] = df_cleaned[sentiment_key_name].apply(lambda x: (x + 1) / 2)
    # 打印合并后的数据框
    df_cleaned.columns.str.capitalize()
    # print(df_cleaned)
    print(len(df_cleaned['Close']))
    if len(df_cleaned['Close']) < 100:
        print(stock_price_csv_file)
        print("Lower than 333")
        return 0, df_cleaned
    return 1, df_cleaned


def start_inte(stock_price_folder_path, news_folder_path, saving_path, sentiment_key_name):
    # 1. 读取CSV文件并转换列名
    stock_price_csv_files = [file for file in os.listdir(stock_price_folder_path) if file.endswith('.csv')]
    for stock_price_csv_file in stock_price_csv_files:
        print(stock_price_csv_file)
        stock_file_path = os.path.join(stock_price_folder_path, stock_price_csv_file)
        stock_price_df = pd.read_csv(stock_file_path)
        stock_price_df.columns = stock_price_df.columns.str.capitalize()
        news_file_path = os.path.join(news_folder_path, stock_price_csv_file)
        if not os.path.isfile(news_file_path):
            print("No file storing corresponding stock news")
            continue
        news_df = pd.read_csv(news_file_path)
        news_df.columns = news_df.columns.str.capitalize()
        flag_333, merged_data = integrate_data(stock_price_df, news_df, stock_price_csv_file, sentiment_key_name)
        if flag_333 == 1:
            print(f"{stock_price_csv_file} ✅ Sufficient data (≥ 333)")
            merged_data.to_csv(os.path.join(saving_path, stock_price_csv_file), index=False)
        else:
            print(f"{stock_price_csv_file} ❌ Lower than 333 - Skipped saving.")


if __name__ == "__main__":
    stock_price_folder = os.path.join(script_dir, 'stock_price_data_preprocessed') #改成10年stock_price_data_preprocessed
    news_folder = os.path.join(script_dir, 'news_data_sentiment_scored')
    saving_path = os.path.join(script_dir, 'sentiment_price_news_integrate')

    os.makedirs(saving_path, exist_ok=True)

    Sentiment_key_name = 'Sentiment_vader'

    start_inte(stock_price_folder, news_folder, saving_path, Sentiment_key_name)

A.csv
170
A.csv ✅ Sufficient data (≥ 333)
AAPL.csv
218
AAPL.csv ✅ Sufficient data (≥ 333)
ABBV.csv
170
ABBV.csv ✅ Sufficient data (≥ 333)
ABT.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

154
ABT.csv ✅ Sufficient data (≥ 333)
ACGL.csv
185
ACGL.csv ✅ Sufficient data (≥ 333)
ACN.csv
154
ACN.csv ✅ Sufficient data (≥ 333)
ADBE.csv
178
ADBE.csv ✅ Sufficient data (≥ 333)
ADI.csv
154
ADI.csv ✅ Sufficient data (≥ 333)
ADM.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

164
ADM.csv ✅ Sufficient data (≥ 333)
ADP.csv
18
ADP.csv
Lower than 333
ADP.csv ❌ Lower than 333 - Skipped saving.
ADSK.csv
127
ADSK.csv ✅ Sufficient data (≥ 333)
AEE.csv
136
AEE.csv ✅ Sufficient data (≥ 333)
AEP.csv
153
AEP.csv ✅ Sufficient data (≥ 333)
AES.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

158
AES.csv ✅ Sufficient data (≥ 333)
AFL.csv
153
AFL.csv ✅ Sufficient data (≥ 333)
AIG.csv
146
AIG.csv ✅ Sufficient data (≥ 333)
AIZ.csv
169
AIZ.csv ✅ Sufficient data (≥ 333)
AJG.csv
2
AJG.csv
Lower than 333
AJG.csv ❌ Lower than 333 - Skipped saving.
AKAM.csv
155
AKAM.csv ✅ Sufficient data (≥ 333)
ALB.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

176
ALB.csv ✅ Sufficient data (≥ 333)
ALGN.csv
145
ALGN.csv ✅ Sufficient data (≥ 333)
ALL.csv
159
ALL.csv ✅ Sufficient data (≥ 333)
ALLE.csv
158
ALLE.csv ✅ Sufficient data (≥ 333)
AMAT.csv
137
AMAT.csv ✅ Sufficient data (≥ 333)
AMD.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

219
AMD.csv ✅ Sufficient data (≥ 333)
AME.csv
169
AME.csv ✅ Sufficient data (≥ 333)
AMGN.csv
147
AMGN.csv ✅ Sufficient data (≥ 333)
AMP.csv
193
AMP.csv ✅ Sufficient data (≥ 333)
AMT.csv
145
AMT.csv ✅ Sufficient data (≥ 333)
AMZN.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

185
AMZN.csv ✅ Sufficient data (≥ 333)
ANET.csv
189
ANET.csv ✅ Sufficient data (≥ 333)
ANSS.csv
126
ANSS.csv ✅ Sufficient data (≥ 333)
AON.csv
173
AON.csv ✅ Sufficient data (≥ 333)
AOS.csv
1
AOS.csv
Lower than 333
AOS.csv ❌ Lower than 333 - Skipped saving.
APA.csv
159
APA.csv ✅ Sufficient data (≥ 333)
APD.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

120
APD.csv ✅ Sufficient data (≥ 333)
APH.csv
87
APH.csv
Lower than 333
APH.csv ❌ Lower than 333 - Skipped saving.
APTV.csv
173
APTV.csv ✅ Sufficient data (≥ 333)
ARE.csv
160
ARE.csv ✅ Sufficient data (≥ 333)
ATO.csv
146
ATO.csv ✅ Sufficient data (≥ 333)
AVB.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

3
AVB.csv
Lower than 333
AVB.csv ❌ Lower than 333 - Skipped saving.
AVGO.csv
1
AVGO.csv
Lower than 333
AVGO.csv ❌ Lower than 333 - Skipped saving.
AVY.csv
81
AVY.csv
Lower than 333
AVY.csv ❌ Lower than 333 - Skipped saving.
AWK.csv
2
AWK.csv
Lower than 333
AWK.csv ❌ Lower than 333 - Skipped saving.
AXP.csv
2
AXP.csv
Lower than 333
AXP.csv ❌ Lower than 333 - Skipped saving.
AZO.csv
2
AZO.csv
Lower than 333
AZO.csv ❌ Lower than 333 - Skipped saving.
BA.csv
40
BA.csv
Lower than 333
BA.csv ❌ Lower than 333 - Skipped saving.
BAC.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

227
BAC.csv ✅ Sufficient data (≥ 333)
BAX.csv
112
BAX.csv ✅ Sufficient data (≥ 333)
BBY.csv
142
BBY.csv ✅ Sufficient data (≥ 333)
BDX.csv
131
BDX.csv ✅ Sufficient data (≥ 333)
BEN.csv
113
BEN.csv ✅ Sufficient data (≥ 333)
BIIB.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

134
BIIB.csv ✅ Sufficient data (≥ 333)
BK.csv
167
BK.csv ✅ Sufficient data (≥ 333)
BKNG.csv
127
BKNG.csv ✅ Sufficient data (≥ 333)
BKR.csv
171
BKR.csv ✅ Sufficient data (≥ 333)
BLK.csv
175
BLK.csv ✅ Sufficient data (≥ 333)
BMY.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

164
BMY.csv ✅ Sufficient data (≥ 333)
BR.csv
128
BR.csv ✅ Sufficient data (≥ 333)
BRO.csv
171
BRO.csv ✅ Sufficient data (≥ 333)
BSX.csv
154
BSX.csv ✅ Sufficient data (≥ 333)
BXP.csv
124
BXP.csv ✅ Sufficient data (≥ 333)
C.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

170
C.csv ✅ Sufficient data (≥ 333)
CAG.csv
119
CAG.csv ✅ Sufficient data (≥ 333)
CAH.csv
146
CAH.csv ✅ Sufficient data (≥ 333)
CARR.csv
129
CARR.csv ✅ Sufficient data (≥ 333)
CAT.csv
218
CAT.csv ✅ Sufficient data (≥ 333)
CCI.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

132
CCI.csv ✅ Sufficient data (≥ 333)
CCL.csv
195
CCL.csv ✅ Sufficient data (≥ 333)
CDNS.csv
136
CDNS.csv ✅ Sufficient data (≥ 333)
CF.csv
152
CF.csv ✅ Sufficient data (≥ 333)
CFG.csv
131
CFG.csv ✅ Sufficient data (≥ 333)
CHD.csv
4
CHD.csv
Lower than 333
CHD.csv ❌ Lower than 333 - Skipped saving.
CHRW.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

170
CHRW.csv ✅ Sufficient data (≥ 333)
CHTR.csv
82
CHTR.csv
Lower than 333
CHTR.csv ❌ Lower than 333 - Skipped saving.
CI.csv
158
CI.csv ✅ Sufficient data (≥ 333)
CINF.csv
181
CINF.csv ✅ Sufficient data (≥ 333)
CL.csv
141
CL.csv ✅ Sufficient data (≥ 333)
CLX.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

160
CLX.csv ✅ Sufficient data (≥ 333)
CMCSA.csv
142
CMCSA.csv ✅ Sufficient data (≥ 333)
CME.csv
157
CME.csv ✅ Sufficient data (≥ 333)
CMG.csv
165
CMG.csv ✅ Sufficient data (≥ 333)
CMI.csv
108
CMI.csv ✅ Sufficient data (≥ 333)
CMS.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df[sentiment_key_name].fillna(3, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting 

186
CMS.csv ✅ Sufficient data (≥ 333)
CNC.csv
225
CNC.csv ✅ Sufficient data (≥ 333)
CNP.csv
120
CNP.csv ✅ Sufficient data (≥ 333)
COF.csv
139
COF.csv ✅ Sufficient data (≥ 333)
COO.csv
130
COO.csv ✅ Sufficient data (≥ 333)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

COP.csv
172
COP.csv ✅ Sufficient data (≥ 333)
COR.csv
52
COR.csv
Lower than 333
COR.csv ❌ Lower than 333 - Skipped saving.
COST.csv
196
COST.csv ✅ Sufficient data (≥ 333)
CPB.csv
127
CPB.csv ✅ Sufficient data (≥ 333)
CPRT.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

127
CPRT.csv ✅ Sufficient data (≥ 333)
CPT.csv
133
CPT.csv ✅ Sufficient data (≥ 333)
CRL.csv
89
CRL.csv
Lower than 333
CRL.csv ❌ Lower than 333 - Skipped saving.
CRM.csv
208
CRM.csv ✅ Sufficient data (≥ 333)
CSCO.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

152
CSCO.csv ✅ Sufficient data (≥ 333)
CSGP.csv
94
CSGP.csv
Lower than 333
CSGP.csv ❌ Lower than 333 - Skipped saving.
CSX.csv
176
CSX.csv ✅ Sufficient data (≥ 333)
CTAS.csv
2
CTAS.csv
Lower than 333
CTAS.csv ❌ Lower than 333 - Skipped saving.
CTRA.csv
84
CTRA.csv
Lower than 333
CTRA.csv ❌ Lower than 333 - Skipped saving.
CTSH.csv
145
CTSH.csv ✅ Sufficient data (≥ 333)
CTVA.csv
103
CTVA.csv ✅ Sufficient data (≥ 333)
CVS.csv
169
CVS.csv ✅ Sufficient data (≥ 333)
CVX.csv
197
CVX.csv ✅ Sufficient data (≥ 333)
CZR.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

117
CZR.csv ✅ Sufficient data (≥ 333)
D.csv
186
D.csv ✅ Sufficient data (≥ 333)
DAL.csv
171
DAL.csv ✅ Sufficient data (≥ 333)
DD.csv
101
DD.csv ✅ Sufficient data (≥ 333)
DE.csv
160
DE.csv ✅ Sufficient data (≥ 333)
DFS.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df[sentiment_key_name].fillna(3, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting 

154
DFS.csv ✅ Sufficient data (≥ 333)
DG.csv
205
DG.csv ✅ Sufficient data (≥ 333)
DGX.csv
122
DGX.csv ✅ Sufficient data (≥ 333)
DHI.csv
154
DHI.csv ✅ Sufficient data (≥ 333)
DHR.csv
88
DHR.csv
Lower than 333
DHR.csv ❌ Lower than 333 - Skipped saving.
DIS.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

186
DIS.csv ✅ Sufficient data (≥ 333)
DLR.csv
199
DLR.csv ✅ Sufficient data (≥ 333)
DLTR.csv
115
DLTR.csv ✅ Sufficient data (≥ 333)
DOC.csv
127
DOC.csv ✅ Sufficient data (≥ 333)
DOV.csv
155
DOV.csv ✅ Sufficient data (≥ 333)
DOW.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

149
DOW.csv ✅ Sufficient data (≥ 333)
DPZ.csv
149
DPZ.csv ✅ Sufficient data (≥ 333)
DRI.csv
157
DRI.csv ✅ Sufficient data (≥ 333)
DTE.csv
175
DTE.csv ✅ Sufficient data (≥ 333)
DUK.csv
139
DUK.csv ✅ Sufficient data (≥ 333)
DVA.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

166
DVA.csv ✅ Sufficient data (≥ 333)
DVN.csv
174
DVN.csv ✅ Sufficient data (≥ 333)
DXCM.csv
164
DXCM.csv ✅ Sufficient data (≥ 333)
EA.csv
150
EA.csv ✅ Sufficient data (≥ 333)
EBAY.csv
142
EBAY.csv ✅ Sufficient data (≥ 333)
ECL.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

101
ECL.csv ✅ Sufficient data (≥ 333)
ED.csv
149
ED.csv ✅ Sufficient data (≥ 333)
EFX.csv
146
EFX.csv ✅ Sufficient data (≥ 333)
EIX.csv
141
EIX.csv ✅ Sufficient data (≥ 333)
EL.csv
137
EL.csv ✅ Sufficient data (≥ 333)
EMN.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

139
EMN.csv ✅ Sufficient data (≥ 333)
EMR.csv
178
EMR.csv ✅ Sufficient data (≥ 333)
ENPH.csv
179
ENPH.csv ✅ Sufficient data (≥ 333)
EOG.csv
153
EOG.csv ✅ Sufficient data (≥ 333)
EPAM.csv
123
EPAM.csv ✅ Sufficient data (≥ 333)
EQIX.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

177
EQIX.csv ✅ Sufficient data (≥ 333)
EQR.csv
166
EQR.csv ✅ Sufficient data (≥ 333)
EQT.csv
140
EQT.csv ✅ Sufficient data (≥ 333)
ES.csv
139
ES.csv ✅ Sufficient data (≥ 333)
ESS.csv
142
ESS.csv ✅ Sufficient data (≥ 333)
ETN.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

138
ETN.csv ✅ Sufficient data (≥ 333)
ETR.csv
131
ETR.csv ✅ Sufficient data (≥ 333)
EVRG.csv
86
EVRG.csv
Lower than 333
EVRG.csv ❌ Lower than 333 - Skipped saving.
EW.csv
145
EW.csv ✅ Sufficient data (≥ 333)
EXC.csv
138
EXC.csv ✅ Sufficient data (≥ 333)
EXPD.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

151
EXPD.csv ✅ Sufficient data (≥ 333)
EXPE.csv
147
EXPE.csv ✅ Sufficient data (≥ 333)
EXR.csv
145
EXR.csv ✅ Sufficient data (≥ 333)
F.csv
110
F.csv ✅ Sufficient data (≥ 333)
FANG.csv
138
FANG.csv ✅ Sufficient data (≥ 333)
FAST.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

166
FAST.csv ✅ Sufficient data (≥ 333)
FCX.csv
204
FCX.csv ✅ Sufficient data (≥ 333)
FDS.csv
118
FDS.csv ✅ Sufficient data (≥ 333)
FDX.csv
145
FDX.csv ✅ Sufficient data (≥ 333)
FE.csv
182
FE.csv ✅ Sufficient data (≥ 333)
FFIV.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

116
FFIV.csv ✅ Sufficient data (≥ 333)
FI.csv
65
FI.csv
Lower than 333
FI.csv ❌ Lower than 333 - Skipped saving.
FIS.csv
134
FIS.csv ✅ Sufficient data (≥ 333)
FITB.csv
168
FITB.csv ✅ Sufficient data (≥ 333)
FOX.csv
100
FOX.csv ✅ Sufficient data (≥ 333)
FOXA.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

115
FOXA.csv ✅ Sufficient data (≥ 333)
FRT.csv
122
FRT.csv ✅ Sufficient data (≥ 333)
FSLR.csv
162
FSLR.csv ✅ Sufficient data (≥ 333)
FTNT.csv
156
FTNT.csv ✅ Sufficient data (≥ 333)
FTV.csv
122
FTV.csv ✅ Sufficient data (≥ 333)
GD.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

251
GD.csv ✅ Sufficient data (≥ 333)
GE.csv
150
GE.csv ✅ Sufficient data (≥ 333)
GEN.csv
115
GEN.csv ✅ Sufficient data (≥ 333)
GILD.csv
116
GILD.csv ✅ Sufficient data (≥ 333)
GIS.csv
164
GIS.csv ✅ Sufficient data (≥ 333)
GL.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

178
GL.csv ✅ Sufficient data (≥ 333)
GLW.csv
129
GLW.csv ✅ Sufficient data (≥ 333)
GM.csv
13
GM.csv
Lower than 333
GM.csv ❌ Lower than 333 - Skipped saving.
GNRC.csv
99
GNRC.csv
Lower than 333
GNRC.csv ❌ Lower than 333 - Skipped saving.
GOOG.csv
217
GOOG.csv ✅ Sufficient data (≥ 333)
GOOGL.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

168
GOOGL.csv ✅ Sufficient data (≥ 333)
GPC.csv
11
GPC.csv
Lower than 333
GPC.csv ❌ Lower than 333 - Skipped saving.
GPN.csv
138
GPN.csv ✅ Sufficient data (≥ 333)
GRMN.csv
185
GRMN.csv ✅ Sufficient data (≥ 333)
GS.csv
174
GS.csv ✅ Sufficient data (≥ 333)
GWW.csv
124
GWW.csv ✅ Sufficient data (≥ 333)
HAL.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

170
HAL.csv ✅ Sufficient data (≥ 333)
HAS.csv
128
HAS.csv ✅ Sufficient data (≥ 333)
HBAN.csv
113
HBAN.csv ✅ Sufficient data (≥ 333)
HCA.csv
150
HCA.csv ✅ Sufficient data (≥ 333)
HD.csv
240
HD.csv ✅ Sufficient data (≥ 333)
HES.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

158
HES.csv ✅ Sufficient data (≥ 333)
HIG.csv
169
HIG.csv ✅ Sufficient data (≥ 333)
HII.csv
No file storing corresponding stock news
HLT.csv
233
HLT.csv ✅ Sufficient data (≥ 333)
HOLX.csv
151
HOLX.csv ✅ Sufficient data (≥ 333)
HON.csv
No file storing corresponding stock news
HPE.csv
150
HPE.csv ✅ Sufficient data (≥ 333)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

HPQ.csv
141
HPQ.csv ✅ Sufficient data (≥ 333)
HRL.csv
119
HRL.csv ✅ Sufficient data (≥ 333)
HSIC.csv
140
HSIC.csv ✅ Sufficient data (≥ 333)
HST.csv
120
HST.csv ✅ Sufficient data (≥ 333)
HSY.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

182
HSY.csv ✅ Sufficient data (≥ 333)
HUM.csv
193
HUM.csv ✅ Sufficient data (≥ 333)
HWM.csv
No file storing corresponding stock news
IBM.csv
1
IBM.csv
Lower than 333
IBM.csv ❌ Lower than 333 - Skipped saving.
IDXX.csv
183
IDXX.csv ✅ Sufficient data (≥ 333)
IFF.csv
127
IFF.csv ✅ Sufficient data (≥ 333)
INCY.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

136
INCY.csv ✅ Sufficient data (≥ 333)
INTC.csv
199
INTC.csv ✅ Sufficient data (≥ 333)
INTU.csv
229
INTU.csv ✅ Sufficient data (≥ 333)
IP.csv
198
IP.csv ✅ Sufficient data (≥ 333)
IPG.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

141
IPG.csv ✅ Sufficient data (≥ 333)
IQV.csv
No file storing corresponding stock news
IRM.csv
No file storing corresponding stock news
ISRG.csv
No file storing corresponding stock news
IT.csv
204
IT.csv ✅ Sufficient data (≥ 333)
ITW.csv
203
ITW.csv ✅ Sufficient data (≥ 333)
IVZ.csv
185
IVZ.csv ✅ Sufficient data (≥ 333)
J.csv
No file storing corresponding stock news
JBHT.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

0
JBHT.csv
Lower than 333
JBHT.csv ❌ Lower than 333 - Skipped saving.
JCI.csv
0
JCI.csv
Lower than 333
JCI.csv ❌ Lower than 333 - Skipped saving.
JKHY.csv
No file storing corresponding stock news
JNJ.csv
0
JNJ.csv
Lower than 333
JNJ.csv ❌ Lower than 333 - Skipped saving.
JNPR.csv
0
JNPR.csv
Lower than 333
JNPR.csv ❌ Lower than 333 - Skipped saving.
JPM.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

0
JPM.csv
Lower than 333
JPM.csv ❌ Lower than 333 - Skipped saving.
K.csv
5
K.csv
Lower than 333
K.csv ❌ Lower than 333 - Skipped saving.
KEY.csv
130
KEY.csv ✅ Sufficient data (≥ 333)
KEYS.csv
208
KEYS.csv ✅ Sufficient data (≥ 333)
KHC.csv
160
KHC.csv ✅ Sufficient data (≥ 333)
KIM.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

145
KIM.csv ✅ Sufficient data (≥ 333)
KLAC.csv
3
KLAC.csv
Lower than 333
KLAC.csv ❌ Lower than 333 - Skipped saving.
KMB.csv
141
KMB.csv ✅ Sufficient data (≥ 333)
KMI.csv
1
KMI.csv
Lower than 333
KMI.csv ❌ Lower than 333 - Skipped saving.
KMX.csv
224
KMX.csv ✅ Sufficient data (≥ 333)
KO.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

199
KO.csv ✅ Sufficient data (≥ 333)
KR.csv
176
KR.csv ✅ Sufficient data (≥ 333)
L.csv
0
L.csv
Lower than 333
L.csv ❌ Lower than 333 - Skipped saving.
LDOS.csv
149
LDOS.csv ✅ Sufficient data (≥ 333)
LEN.csv
150
LEN.csv ✅ Sufficient data (≥ 333)
LH.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

3
LH.csv
Lower than 333
LH.csv ❌ Lower than 333 - Skipped saving.
LHX.csv
115
LHX.csv ✅ Sufficient data (≥ 333)
LIN.csv
1
LIN.csv
Lower than 333
LIN.csv ❌ Lower than 333 - Skipped saving.
LKQ.csv
No file storing corresponding stock news
LLY.csv
1
LLY.csv
Lower than 333
LLY.csv ❌ Lower than 333 - Skipped saving.
LMT.csv
2
LMT.csv
Lower than 333
LMT.csv ❌ Lower than 333 - Skipped saving.
LNT.csv
1
LNT.csv
Lower than 333
LNT.csv ❌ Lower than 333 - Skipped saving.
LOW.csv
2
LOW.csv
Lower than 333
LOW.csv ❌ Lower than 333 - Skipped saving.
LRCX.csv
4
LRCX.csv
Lower than 333
LRCX.csv ❌ Lower than 333 - Skipped saving.
LUV.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

1
LUV.csv
Lower than 333
LUV.csv ❌ Lower than 333 - Skipped saving.
LVS.csv
No file storing corresponding stock news
LW.csv
3
LW.csv
Lower than 333
LW.csv ❌ Lower than 333 - Skipped saving.
LYB.csv
0
LYB.csv
Lower than 333
LYB.csv ❌ Lower than 333 - Skipped saving.
LYV.csv
1
LYV.csv
Lower than 333
LYV.csv ❌ Lower than 333 - Skipped saving.
MA.csv
192
MA.csv ✅ Sufficient data (≥ 333)
MAA.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

203
MAA.csv ✅ Sufficient data (≥ 333)
MAR.csv
209
MAR.csv ✅ Sufficient data (≥ 333)
MAS.csv
207
MAS.csv ✅ Sufficient data (≥ 333)
MCD.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

171
MCD.csv ✅ Sufficient data (≥ 333)
MCHP.csv
259
MCHP.csv ✅ Sufficient data (≥ 333)
MCK.csv
0
MCK.csv
Lower than 333
MCK.csv ❌ Lower than 333 - Skipped saving.
MCO.csv
0
MCO.csv
Lower than 333
MCO.csv ❌ Lower than 333 - Skipped saving.
MDLZ.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values a

140
MDLZ.csv ✅ Sufficient data (≥ 333)
MDT.csv
0
MDT.csv
Lower than 333
MDT.csv ❌ Lower than 333 - Skipped saving.
MET.csv
213
MET.csv ✅ Sufficient data (≥ 333)
MGM.csv
0
MGM.csv
Lower than 333
MGM.csv ❌ Lower than 333 - Skipped saving.
MHK.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

0
MHK.csv
Lower than 333
MHK.csv ❌ Lower than 333 - Skipped saving.
MKC.csv
0
MKC.csv
Lower than 333
MKC.csv ❌ Lower than 333 - Skipped saving.
MLM.csv
0
MLM.csv
Lower than 333
MLM.csv ❌ Lower than 333 - Skipped saving.
MMC.csv
144
MMC.csv ✅ Sufficient data (≥ 333)
MMM.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

0
MMM.csv
Lower than 333
MMM.csv ❌ Lower than 333 - Skipped saving.
MNST.csv
148
MNST.csv ✅ Sufficient data (≥ 333)
MO.csv
156
MO.csv ✅ Sufficient data (≥ 333)
MOS.csv
169
MOS.csv ✅ Sufficient data (≥ 333)
MPC.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

231
MPC.csv ✅ Sufficient data (≥ 333)
MPWR.csv
144
MPWR.csv ✅ Sufficient data (≥ 333)
MRK.csv
162
MRK.csv ✅ Sufficient data (≥ 333)
MS.csv
164
MS.csv ✅ Sufficient data (≥ 333)
MSCI.csv
149
MSCI.csv ✅ Sufficient data (≥ 333)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

MSFT.csv
221
MSFT.csv ✅ Sufficient data (≥ 333)
MSI.csv
124
MSI.csv ✅ Sufficient data (≥ 333)
MTB.csv
135
MTB.csv ✅ Sufficient data (≥ 333)
MTCH.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

117
MTCH.csv ✅ Sufficient data (≥ 333)
MTD.csv
173
MTD.csv ✅ Sufficient data (≥ 333)
MU.csv
150
MU.csv ✅ Sufficient data (≥ 333)
NCLH.csv
118
NCLH.csv ✅ Sufficient data (≥ 333)
NDAQ.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

155
NDAQ.csv ✅ Sufficient data (≥ 333)
NDSN.csv
93
NDSN.csv
Lower than 333
NDSN.csv ❌ Lower than 333 - Skipped saving.
NEE.csv
171
NEE.csv ✅ Sufficient data (≥ 333)
NEM.csv
121
NEM.csv ✅ Sufficient data (≥ 333)
NFLX.csv
218
NFLX.csv ✅ Sufficient data (≥ 333)
NI.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

151
NI.csv ✅ Sufficient data (≥ 333)
NKE.csv
157
NKE.csv ✅ Sufficient data (≥ 333)
NOC.csv
161
NOC.csv ✅ Sufficient data (≥ 333)
NOW.csv
171
NOW.csv ✅ Sufficient data (≥ 333)
NRG.csv
157
NRG.csv ✅ Sufficient data (≥ 333)
NSC.csv
145
NSC.csv ✅ Sufficient data (≥ 333)
NTAP.csv
104
NTAP.csv ✅ Sufficient data (≥ 333)
NTRS.csv
165
NTRS.csv ✅ Sufficient data (≥ 333)
NUE.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

139
NUE.csv ✅ Sufficient data (≥ 333)
NVR.csv
130
NVR.csv ✅ Sufficient data (≥ 333)
NWS.csv
189
NWS.csv ✅ Sufficient data (≥ 333)
NWSA.csv
120
NWSA.csv ✅ Sufficient data (≥ 333)
O.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

0
O.csv
Lower than 333
O.csv ❌ Lower than 333 - Skipped saving.
ODFL.csv
136
ODFL.csv ✅ Sufficient data (≥ 333)
OKE.csv
123
OKE.csv ✅ Sufficient data (≥ 333)
OMC.csv
183
OMC.csv ✅ Sufficient data (≥ 333)
ON.csv
133
ON.csv ✅ Sufficient data (≥ 333)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

ORCL.csv
134
ORCL.csv ✅ Sufficient data (≥ 333)
ORLY.csv
162
ORLY.csv ✅ Sufficient data (≥ 333)
OTIS.csv
133
OTIS.csv ✅ Sufficient data (≥ 333)
OXY.csv
160
OXY.csv ✅ Sufficient data (≥ 333)
PAYX.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

142
PAYX.csv ✅ Sufficient data (≥ 333)
PCG.csv
154
PCG.csv ✅ Sufficient data (≥ 333)
PEG.csv
98
PEG.csv
Lower than 333
PEG.csv ❌ Lower than 333 - Skipped saving.
PEP.csv
173
PEP.csv ✅ Sufficient data (≥ 333)
PFE.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df[sentiment_key_name].fillna(3, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting 

0
PFE.csv
Lower than 333
PFE.csv ❌ Lower than 333 - Skipped saving.
PFG.csv
146
PFG.csv ✅ Sufficient data (≥ 333)
PG.csv
174
PG.csv ✅ Sufficient data (≥ 333)
PGR.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

166
PGR.csv ✅ Sufficient data (≥ 333)
PH.csv
144
PH.csv ✅ Sufficient data (≥ 333)
PHM.csv
142
PHM.csv ✅ Sufficient data (≥ 333)
PLD.csv
183
PLD.csv ✅ Sufficient data (≥ 333)
PM.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

0
PM.csv
Lower than 333
PM.csv ❌ Lower than 333 - Skipped saving.
PNR.csv
109
PNR.csv ✅ Sufficient data (≥ 333)
PNW.csv
99
PNW.csv
Lower than 333
PNW.csv ❌ Lower than 333 - Skipped saving.
POOL.csv
97
POOL.csv
Lower than 333
POOL.csv ❌ Lower than 333 - Skipped saving.
PPG.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

209
PPG.csv ✅ Sufficient data (≥ 333)
PPL.csv
206
PPL.csv ✅ Sufficient data (≥ 333)
PRU.csv
141
PRU.csv ✅ Sufficient data (≥ 333)
PSA.csv
116
PSA.csv ✅ Sufficient data (≥ 333)
PSX.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

126
PSX.csv ✅ Sufficient data (≥ 333)
PWR.csv
161
PWR.csv ✅ Sufficient data (≥ 333)
QCOM.csv
143
QCOM.csv ✅ Sufficient data (≥ 333)
RCL.csv
19
RCL.csv
Lower than 333
RCL.csv ❌ Lower than 333 - Skipped saving.
REG.csv
128
REG.csv ✅ Sufficient data (≥ 333)
REGN.csv
38
REGN.csv
Lower than 333
REGN.csv ❌ Lower than 333 - Skipped saving.
RF.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

74
RF.csv
Lower than 333
RF.csv ❌ Lower than 333 - Skipped saving.
RJF.csv
162
RJF.csv ✅ Sufficient data (≥ 333)
RL.csv
150
RL.csv ✅ Sufficient data (≥ 333)
RMD.csv
96
RMD.csv
Lower than 333
RMD.csv ❌ Lower than 333 - Skipped saving.
ROK.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

141
ROK.csv ✅ Sufficient data (≥ 333)
ROL.csv
134
ROL.csv ✅ Sufficient data (≥ 333)
ROP.csv
109
ROP.csv ✅ Sufficient data (≥ 333)
ROST.csv
152
ROST.csv ✅ Sufficient data (≥ 333)
RSG.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

146
RSG.csv ✅ Sufficient data (≥ 333)
RTX.csv
No file storing corresponding stock news
SBAC.csv
178
SBAC.csv ✅ Sufficient data (≥ 333)
SBUX.csv
0
SBUX.csv
Lower than 333
SBUX.csv ❌ Lower than 333 - Skipped saving.
SCHW.csv
8
SCHW.csv
Lower than 333
SCHW.csv ❌ Lower than 333 - Skipped saving.
SHW.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

0
SHW.csv
Lower than 333
SHW.csv ❌ Lower than 333 - Skipped saving.
SJM.csv
175
SJM.csv ✅ Sufficient data (≥ 333)
SLB.csv
0
SLB.csv
Lower than 333
SLB.csv ❌ Lower than 333 - Skipped saving.
SNA.csv
0
SNA.csv
Lower than 333
SNA.csv ❌ Lower than 333 - Skipped saving.
SNPS.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

219
SNPS.csv ✅ Sufficient data (≥ 333)
SO.csv
0
SO.csv
Lower than 333
SO.csv ❌ Lower than 333 - Skipped saving.
SPGI.csv
No file storing corresponding stock news
SRE.csv
184
SRE.csv ✅ Sufficient data (≥ 333)
STT.csv
No file storing corresponding stock news
STZ.csv
162
STZ.csv ✅ Sufficient data (≥ 333)
SWK.csv
150
SWK.csv ✅ Sufficient data (≥ 333)
SWKS.csv
136
SWKS.csv ✅ Sufficient data (≥ 333)
SYK.csv
130
SYK.csv ✅ Sufficient data (≥ 333)
SYY.csv
112
SYY.csv ✅ Sufficient data (≥ 333)
T.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

186
T.csv ✅ Sufficient data (≥ 333)
TECH.csv
57
TECH.csv
Lower than 333
TECH.csv ❌ Lower than 333 - Skipped saving.
TEL.csv
203
TEL.csv ✅ Sufficient data (≥ 333)
TER.csv
147
TER.csv ✅ Sufficient data (≥ 333)
TFC.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

153
TFC.csv ✅ Sufficient data (≥ 333)
TGT.csv
161
TGT.csv ✅ Sufficient data (≥ 333)
TJX.csv
148
TJX.csv ✅ Sufficient data (≥ 333)
TMUS.csv
150
TMUS.csv ✅ Sufficient data (≥ 333)
TPR.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

145
TPR.csv ✅ Sufficient data (≥ 333)
TRGP.csv
121
TRGP.csv ✅ Sufficient data (≥ 333)
TRMB.csv
167
TRMB.csv ✅ Sufficient data (≥ 333)
TROW.csv
164
TROW.csv ✅ Sufficient data (≥ 333)
TRV.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

178
TRV.csv ✅ Sufficient data (≥ 333)
TSCO.csv
168
TSCO.csv ✅ Sufficient data (≥ 333)
TSLA.csv
216
TSLA.csv ✅ Sufficient data (≥ 333)
TSN.csv
125
TSN.csv ✅ Sufficient data (≥ 333)
TT.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

193
TT.csv ✅ Sufficient data (≥ 333)
TTWO.csv
134
TTWO.csv ✅ Sufficient data (≥ 333)
TXN.csv
134
TXN.csv ✅ Sufficient data (≥ 333)
TYL.csv
173
TYL.csv ✅ Sufficient data (≥ 333)
UAL.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df[sentiment_key_name].fillna(3, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting 

0
UAL.csv
Lower than 333
UAL.csv ❌ Lower than 333 - Skipped saving.
UHS.csv
146
UHS.csv ✅ Sufficient data (≥ 333)
ULTA.csv
164
ULTA.csv ✅ Sufficient data (≥ 333)
UNP.csv
143
UNP.csv ✅ Sufficient data (≥ 333)
UPS.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df[sentiment_key_name].fillna(3, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting 

144
UPS.csv ✅ Sufficient data (≥ 333)
URI.csv
149
URI.csv ✅ Sufficient data (≥ 333)
USB.csv
132
USB.csv ✅ Sufficient data (≥ 333)
V.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

198
V.csv ✅ Sufficient data (≥ 333)
VICI.csv
108
VICI.csv ✅ Sufficient data (≥ 333)
VLO.csv
121
VLO.csv ✅ Sufficient data (≥ 333)
VMC.csv
148
VMC.csv ✅ Sufficient data (≥ 333)
VRSK.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

152
VRSK.csv ✅ Sufficient data (≥ 333)
VRSN.csv
125
VRSN.csv ✅ Sufficient data (≥ 333)
VRTX.csv
178
VRTX.csv ✅ Sufficient data (≥ 333)
VTR.csv
147
VTR.csv ✅ Sufficient data (≥ 333)
WAB.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

132
WAB.csv ✅ Sufficient data (≥ 333)
WAT.csv
119
WAT.csv ✅ Sufficient data (≥ 333)
WBA.csv
158
WBA.csv ✅ Sufficient data (≥ 333)
WDC.csv
133
WDC.csv ✅ Sufficient data (≥ 333)
WFC.csv
154
WFC.csv ✅ Sufficient data (≥ 333)
WMT.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

197
WMT.csv ✅ Sufficient data (≥ 333)
WST.csv
157
WST.csv ✅ Sufficient data (≥ 333)
WY.csv
95
WY.csv
Lower than 333
WY.csv ❌ Lower than 333 - Skipped saving.
XEL.csv
162
XEL.csv ✅ Sufficient data (≥ 333)
XOM.csv
178
XOM.csv ✅ Sufficient data (≥ 333)
XYL.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

136
XYL.csv ✅ Sufficient data (≥ 333)
YUM.csv
191
YUM.csv ✅ Sufficient data (≥ 333)
ZBH.csv
165
ZBH.csv ✅ Sufficient data (≥ 333)
ZBRA.csv
142
ZBRA.csv ✅ Sufficient data (≥ 333)
ZTS.csv
111
ZTS.csv ✅ Sufficient data (≥ 333)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Positive'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_sentiment_filled['Negative'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on

In [34]:
# 检查集成后数据中的缺失值情况
def check_missing_values(folder_path):
    """
    检查指定文件夹中所有CSV文件的缺失值情况
    
    参数:
    folder_path: 包含CSV文件的文件夹路径
    
    返回:
    无，直接打印结果
    """
    print("\n" + "="*80)
    print("检查集成后数据中的缺失值情况")
    print("="*80)
    
    # 获取所有CSV文件
    csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]
    
    if not csv_files:
        print("文件夹中没有找到CSV文件")
        return
    
    # 创建结果汇总表
    summary_data = []
    
    # 遍历每个文件
    for file in csv_files:
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)
        
        # 计算每列的缺失值数量和百分比
        missing_count = df.isnull().sum()
        missing_percent = (missing_count / len(df)) * 100
        
        # 获取有缺失值的列
        columns_with_missing = missing_count[missing_count > 0]
        
        if len(columns_with_missing) > 0:
            # 将结果添加到汇总表
            for col, count in columns_with_missing.items():
                summary_data.append({
                    '文件名': file,
                    '列名': col,
                    '缺失值数量': count,
                    '缺失值百分比': missing_percent[col],
                    '总行数': len(df)
                })
        else:
            # 如果没有缺失值，也记录
            summary_data.append({
                '文件名': file,
                '列名': '无缺失值',
                '缺失值数量': 0,
                '缺失值百分比': 0,
                '总行数': len(df)
            })
    
    # 创建汇总DataFrame
    summary_df = pd.DataFrame(summary_data)
    
    # 按文件名和缺失值数量排序
    summary_df = summary_df.sort_values(['文件名', '缺失值数量'], ascending=[True, False])
    
    # 打印汇总结果
    if len(summary_df) > 0:
        print("\n缺失值汇总:")
        print(summary_df)
        
        # 计算有缺失值的文件数量
        files_with_missing = summary_df[summary_df['列名'] != '无缺失值']['文件名'].nunique()
        total_files = len(csv_files)
        print(f"\n总结: {total_files}个文件中有{files_with_missing}个文件包含缺失值 ({files_with_missing/total_files*100:.2f}%)")
    else:
        print("没有找到任何缺失值")

# 执行检查
sentiment_price_integrated_folder = os.path.join(script_dir, 'sentiment_price_news_integrate')
check_missing_values(sentiment_price_integrated_folder)

# 如果发现缺失值，可以选择填充它们
def fill_missing_values(folder_path, output_folder=None):
    """
    填充指定文件夹中所有CSV文件的缺失值
    
    参数:
    folder_path: 包含CSV文件的文件夹路径
    output_folder: 保存填充后文件的文件夹路径，如果为None则覆盖原文件
    
    返回:
    无，直接保存处理后的文件
    """
    if output_folder is None:
        output_folder = folder_path
    else:
        os.makedirs(output_folder, exist_ok=True)
    
    print("\n" + "="*80)
    print("填充集成后数据中的缺失值")
    print("="*80)
    
    # 获取所有CSV文件
    csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]
    
    for file in csv_files:
        file_path = os.path.join(folder_path, file)
        output_path = os.path.join(output_folder, file)
        
        df = pd.read_csv(file_path)
        
        # 检查是否有缺失值
        missing_count = df.isnull().sum()
        has_missing = missing_count.sum() > 0
        
        if has_missing:
            print(f"处理文件: {file}")
            
            # 对于数值列，使用前向填充和后向填充
            numeric_cols = df.select_dtypes(include=['number']).columns
            for col in numeric_cols:
                if missing_count[col] > 0:
                    # 先使用前向填充，再使用后向填充
                    df[col] = df[col].fillna(method='ffill').fillna(method='bfill')
                    # 如果仍有缺失值（如果序列开头有缺失值），则用0填充
                    df[col] = df[col].fillna(0)
                    print(f"  - 已填充列 '{col}' 的缺失值")
            
            # 对于非数值列，使用最频繁值填充
            non_numeric_cols = df.select_dtypes(exclude=['number']).columns
            for col in non_numeric_cols:
                if missing_count[col] > 0:
                    # 使用最频繁值填充
                    most_frequent = df[col].mode()[0]
                    df[col] = df[col].fillna(most_frequent)
                    print(f"  - 已用最频繁值填充列 '{col}' 的缺失值")
            
            # 保存处理后的文件
            df.to_csv(output_path, index=False)
            print(f"  ✅ 已保存处理后的文件到 {output_path}")
        else:
            if output_folder != folder_path:
                # 如果没有缺失值但输出文件夹不同，则复制文件
                df.to_csv(output_path, index=False)
                print(f"文件 {file} 没有缺失值，已复制到输出文件夹")

# 执行填充缺失值（如果需要）
# 如果想保存到新文件夹，可以指定output_folder参数
fill_missing_values(sentiment_price_integrated_folder)



检查集成后数据中的缺失值情况

缺失值汇总:
          文件名    列名  缺失值数量  缺失值百分比  总行数
0       A.csv  无缺失值      0     0.0  170
1    AAPL.csv  无缺失值      0     0.0  218
2    ABBV.csv  无缺失值      0     0.0  170
3     ABT.csv  无缺失值      0     0.0  154
4    ACGL.csv  无缺失值      0     0.0  185
..        ...   ...    ...     ...  ...
391   XYL.csv  无缺失值      0     0.0  136
392   YUM.csv  无缺失值      0     0.0  191
393   ZBH.csv  无缺失值      0     0.0  165
394  ZBRA.csv  无缺失值      0     0.0  142
395   ZTS.csv  无缺失值      0     0.0  111

[396 rows x 5 columns]

总结: 391个文件中有1个文件包含缺失值 (0.26%)

填充集成后数据中的缺失值
处理文件: AXP.csv
  - 已填充列 'Open' 的缺失值
  - 已填充列 'High' 的缺失值
  - 已填充列 'Low' 的缺失值
  - 已填充列 'Close' 的缺失值
  - 已填充列 'Adj close' 的缺失值
  - 已填充列 'Volume' 的缺失值
  ✅ 已保存处理后的文件到 c:\Users\Mencius\Desktop\independent_project\fin_news\fnspid\FNSPID_source_data\sentiment_price_news_integrate\AXP.csv


  df[col] = df[col].fillna(method='ffill').fillna(method='bfill')


检查聚合后日期情况

In [31]:
news_folder = os.path.join(script_dir, 'sentiment_price_news_integrate')
stock_price_csv_files = [file for file in os.listdir(news_folder) if file.endswith('.csv')]
for stock_price_csv_file in stock_price_csv_files:
    stock_file_path = os.path.join(news_folder, stock_price_csv_file)
    stock_price_df = pd.read_csv(stock_file_path)
    print(stock_price_df.iloc[0,1],stock_price_csv_file)

1565700 A.csv
130.47000122070312 AAPL.csv
165.89999389648438 ABBV.csv
112.5 ABT.csv
62.68000030517578 ACGL.csv
283.0400085449219 ACN.csv
338.8999938964844 ADBE.csv
166.5 ADI.csv
86.0 ADM.csv
128.38999938964844 ADP.csv
200.13999938964844 ADSK.csv
85.16000366210938 AEE.csv
96.41000366210938 AEP.csv
27.21999931335449 AES.csv
73.5 AFL.csv
63.79999923706055 AIG.csv
130.19000244140625 AIZ.csv
1159300 AJG.csv
90.0 AKAM.csv
225.3500061035156 ALB.csv
830600 ALGN.csv
140.38999938964844 ALL.csv
485600 ALLE.csv
106.68000030517578 AMAT.csv
66.22000122070312 AMD.csv
143.72000122070312 AME.csv
271.9200134277344 AMGN.csv
103.44000244140624 AMP.csv
220.8500061035156 AMT.csv
93.5999984741211 AMZN.csv
113.7699966430664 ANET.csv
556400 ANSS.csv
306.4599914550781 AON.csv
42.209999084472656 AOS.csv
43.11000061035156 APA.csv
308.9700012207031 APD.csv
82.01000213623047 APH.csv
100.29000091552734 APTV.csv
147.69000244140625 ARE.csv
117.06999969482422 ATO.csv
172.57000732421875 AVB.csv
248.8500061035156 AVGO.cs

检查新闻表出现的日期情况

In [32]:
news_folder = os.path.join(script_dir, 'news_data_sentiment_scored')
stock_price_csv_files = [file for file in os.listdir(news_folder) if file.endswith('.csv')]
for stock_price_csv_file in stock_price_csv_files:
    stock_file_path = os.path.join(news_folder, stock_price_csv_file)
    stock_price_df = pd.read_csv(stock_file_path)
    print(stock_price_df.iloc[0,0],stock_price_csv_file)

2023-12-16 A.csv
2023-12-16 AAPL.csv
2023-12-15 ABBV.csv
2023-12-15 ABT.csv
2023-12-15 ACGL.csv
2023-12-15 ACN.csv
2023-12-15 ADBE.csv
2023-12-15 ADI.csv
2023-12-15 ADM.csv
2023-12-15 ADP.csv
2023-12-15 ADSK.csv
2023-12-15 AEE.csv
2023-12-15 AEP.csv
2023-12-15 AES.csv
2023-12-14 AFL.csv
2023-12-15 AIG.csv
2023-12-16 AIZ.csv
2023-12-15 AJG.csv
2023-12-14 AKAM.csv
2023-12-15 ALB.csv
2023-12-15 ALGN.csv
2023-12-14 ALL.csv
2023-12-15 ALLE.csv
2023-12-15 AMAT.csv
2024-01-08 AMD.csv
2023-12-11 AME.csv
2023-12-16 AMGN.csv
2020-06-08 06:03:00+00:00 AMP.csv
2023-12-16 AMT.csv
2023-12-16 AMZN.csv
2023-12-16 ANET.csv
2023-12-15 ANSS.csv
2023-12-16 AON.csv
2023-12-16 AOS.csv
2023-12-14 APA.csv
2023-12-15 APD.csv
2023-12-15 APH.csv
2023-12-15 APTV.csv
2023-12-15 ARE.csv
2023-12-15 ATO.csv
2023-12-10 AVB.csv
2023-12-15 AVGO.csv
2023-12-13 AVY.csv
2023-12-14 AWK.csv
2023-12-15 AXP.csv
2023-12-15 AZO.csv
2023-12-16 BA.csv
2020-06-10 06:22:11+00:00 BAC.csv
2023-12-13 BAX.csv
2023-12-12 BBY.csv
2020-06-

### Forecast and Evaluate

# Stage 2

## Asymmetric Responses

In [None]:
1.9-12.16 15 0.82

## Short- to Long-term Reversion

## Disovery Speed

# Trash bin

In [7]:
import pandas as pd
from datetime import datetime, timedelta

# 步骤1：先快速找出最大日期（仅读取Date列）
max_date = None
for chunk in pd.read_csv(filepath_nas, usecols=['Date'], chunksize=10000):
    chunk_dates = pd.to_datetime(chunk['Date'])
    if max_date is None:
        max_date = chunk_dates.max()
    else:
        max_date = max(max_date, chunk_dates.max())
cutoff_date = max_date - timedelta(days=365*10)  # 计算10年前的日期

# 步骤2：分块读取并过滤符合条件的数据
filtered_chunks = []
for chunk in pd.read_csv(filepath_nas, parse_dates=['Date'], chunksize=10000):
    mask = chunk['Date'] >= cutoff_date
    filtered_chunks.append(chunk[mask])

# 合并结果
final_df = pd.concat(filtered_chunks, ignore_index=True)
print(f"最后十年数据量: {len(final_df)} 行")
print(final_df.head())

KeyboardInterrupt: 

In [None]:
# 检查nas中的publisher和author的空值和非空值数量，以及比例
total_publishers = news_nas['Publisher'].size
total_authors = news_nas['Author'].size

empty_publishers_count = news_nas['Publisher'].isnull().sum()
empty_authors_count = news_nas['Author'].isnull().sum()

non_empty_publishers_count = total_publishers - empty_publishers_count
non_empty_authors_count = total_authors - empty_authors_count

publisher_ratio = non_empty_publishers_count / total_publishers
author_ratio = non_empty_authors_count / total_authors

print(f"Publisher: 空值数量 = {empty_publishers_count}, 非空值数量 = {non_empty_publishers_count}, 比例 = {publisher_ratio:.2%}")
print(f"Author: 空值数量 = {empty_authors_count}, 非空值数量 = {non_empty_authors_count}, 比例 = {author_ratio:.2%}")

Publisher: 空值数量 = 949754, 非空值数量 = 0, 比例 = 0.00%
Author: 空值数量 = 949754, 非空值数量 = 0, 比例 = 0.00%


In [None]:
# 检查nas中的summary和Article的空值和非空值数量，以及比例
total_summary = news_nas['Lsa_summary'].size
total_article = news_nas['Article'].size

empty_summary_count = news_nas['Lsa_summary'].isnull().sum()
empty_article_count = news_nas['Article'].isnull().sum()

non_empty_summary_count = total_summary - empty_summary_count
non_empty_article_count = total_article - empty_article_count

summary_ratio = non_empty_summary_count / total_summary
article_ratio = non_empty_article_count / total_article

print(f"summary: 空值数量 = {empty_summary_count}, 非空值数量 = {non_empty_summary_count}, 比例 = {summary_ratio:.2%}")
print(f"article: 空值数量 = {empty_article_count}, 非空值数量 = {non_empty_article_count}, 比例 = {article_ratio:.2%}")

In [None]:
# merge
#average_sentiment = pd.read_csv(os.path.join(script_dir, 'sentiment_average_1y.csv'), parse_dates=['Date'])
price = pd.read_csv(os.path.join(script_dir, 'price_1y.csv'), parse_dates=['Date'])
average_sentiment['Date'] = average_sentiment['Date'].dt.tz_localize(None)
merged_data = pd.merge(average_sentiment, price, on=['Date', 'Stock_symbol'], how='outer')
#print(merged_data[merged_data['Stock_symbol'] == 'A'].head())

# fill NaN（向前填充，但如果一开始就是NaN就向后填充
merged_data = (
    merged_data
    .sort_values(['Stock_symbol', 'Date'])  # 按股票和时间排序
    .groupby('Stock_symbol')                # 分组
    .apply(lambda group: group.ffill().bfill())  # 先向前填充，再向后填充
    .reset_index(drop=True)                 # 重置索引
)

print(merged_data.head())
merged_data.to_csv('data.csv')
# normalize
# scaler = MinMaxScaler()
# merged_data[['sentiment_score', 'close']] = scaler.fit_transform(merged_data[['sentiment_score', 'close']])


  .apply(lambda group: group.ffill().bfill())  # 先向前填充，再向后填充


  Stock_symbol       Date  sentiment_score     volume        open        high  \
0            A 2023-01-03          0.72834  1414300.0  151.960007  153.130005   
1            A 2023-01-04          0.72834  1247400.0  151.649994  153.039993   
2            A 2023-01-05          0.72834  1714600.0  150.000000  153.070007   
3            A 2023-01-06          0.72834  2445000.0  154.360001  154.639999   
4            A 2023-01-09          0.72834  1269600.0  149.690002  151.279999   

          low       close   adj close  
0  148.470001  150.039993  149.209625  
1  150.240005  151.669998  150.830612  
2  148.770004  152.110001  151.268173  
3  143.009995  147.669998  146.852753  
4  147.199997  147.470001  146.653839  
