In [24]:
import pandas as pd
import os
from glob import glob

In [25]:
tickers = ['META', 'AAPL', 'AMZN', 'NFLX', 'GOOG', 'MSFT', 'IBM', 'ORCL', 'NVDA', 'INTC']

In [26]:
def combine_csv_files(input_dir, stock, mode, out_dir, time_col):
    # Construct the pattern to find the matching files
    pattern = os.path.join(input_dir, f"{stock}_{mode}*.csv")
    csv_files = glob(pattern)

    if not csv_files:
        print("No matching files found.")
        return

    # Read and concatenate all matching files
    df_list = []
    for file in csv_files:
        try:
            df_list.append(pd.read_csv(file))
        except Exception as e:
            print(f"Read {file} failed!") 
    combined_df = pd.concat(df_list, ignore_index=True)

    combined_df[time_col] = pd.to_datetime(combined_df[time_col])
    sorted_df = combined_df.sort_values(by=time_col)

    # Save the combined DataFrame to a CSV file
    output_file = f"{out_dir}/{stock}_{mode}.csv"
    sorted_df.to_csv(output_file, index=False)
    print(f"Combined data saved to {output_file}")

In [27]:
for stock in tickers:
    for mode, tcol in zip(["main", "relation"], ["publish_time", "time"]):
        combine_csv_files("../dataset/news_alpha", stock, mode, "news_alpha", tcol)
        combine_csv_files("../dataset/news_maux", stock, mode, "news_maux", tcol)

Combined data saved to news_alpha/META_main.csv
Read ../dataset/news_maux/META_main_2022-01-06_2022-01-08.csv failed!
Read ../dataset/news_maux/META_main_2022-01-26_2022-01-28.csv failed!
Read ../dataset/news_maux/META_main_2022-01-27_2022-01-29.csv failed!
Read ../dataset/news_maux/META_main_2022-01-07_2022-01-09.csv failed!
Read ../dataset/news_maux/META_main_2022-01-18_2022-01-20.csv failed!
Read ../dataset/news_maux/META_main_2022-01-16_2022-01-18.csv failed!
Read ../dataset/news_maux/META_main_2022-01-04_2022-01-06.csv failed!
Read ../dataset/news_maux/META_main_2022-01-03_2022-01-05.csv failed!
Read ../dataset/news_maux/META_main_2022-01-23_2022-01-25.csv failed!
Read ../dataset/news_maux/META_main_2022-01-24_2022-01-26.csv failed!
Read ../dataset/news_maux/META_main_2022-01-05_2022-01-07.csv failed!
Read ../dataset/news_maux/META_main_2022-01-10_2022-01-12.csv failed!
Read ../dataset/news_maux/META_main_2022-01-21_2022-01-23.csv failed!
Read ../dataset/news_maux/META_main_2022-0

In [30]:
pd.read_csv("news_alpha/AAPL_relation.csv")

Unnamed: 0,news_id,source_ticker,time,ticker,relevance_score,ticker_sentiment_score,ticker_sentiment_label
0,,AAPL,2023-01-01 05:13:00,BRK-A,0.112797,0.138191,Neutral
1,,AAPL,2023-01-01 05:13:00,NFLX,0.056540,0.067098,Neutral
2,,AAPL,2023-01-01 05:13:00,PARA,0.277118,0.222256,Somewhat-Bullish
3,,AAPL,2023-01-01 05:13:00,AAPL,0.277118,0.187644,Somewhat-Bullish
4,,AAPL,2023-01-01 05:30:00,GOOG,0.751437,0.466083,Bullish
...,...,...,...,...,...,...,...
2016,,AAPL,2024-10-02 15:27:48,AAPL,0.325924,0.139146,Neutral
2017,,AAPL,2024-10-02 15:27:48,NVDA,0.220809,0.213132,Somewhat-Bullish
2018,,AAPL,2024-10-02 15:27:48,MSFT,0.325924,0.139146,Neutral
2019,,AAPL,2024-10-02 17:45:19,AAPL,0.548363,0.302992,Somewhat-Bullish
