In [5]:
import pandas as pd
from pathlib import Path


In [9]:


RAW_PATH = Path("data/raw")
PROCESSED_PATH = Path("data/processed")
PROCESSED_PATH.mkdir(parents=True, exist_ok=True)

def process_market_data():
    market = pd.read_csv(RAW_PATH / "nasdaq100_prices.csv", parse_dates=True)
    market.reset_index(inplace=True)
    market.rename(columns={"Date": "date"}, inplace=True)
    market["date"] = pd.to_datetime(market["date"]).dt.date
    return market

def process_news_data():
    news = pd.read_csv(RAW_PATH / "nasdaq_news_sentiment.csv")
    news["datetime"] = pd.to_datetime(news["datetime"])
    news["date"] = news["datetime"].dt.date

    daily_news = (
        news
        .groupby("date")
        .agg(
            avg_sentiment=("sentiment_score", "mean"),
            sentiment_std=("sentiment_score", "std"),
            news_count=("sentiment_score", "count")
        )
        .reset_index()
    )

    return daily_news

def merge_datasets():
    market = process_market_data()
    news = process_news_data()

    df = market.merge(news, on="date", how="left")

    # Remplissage (jours sans news)
    df["avg_sentiment"] = df["avg_sentiment"].fillna(0)
    df["sentiment_std"] = df["sentiment_std"].fillna(0)
    df["news_count"] = df["news_count"].fillna(0)

    df.to_csv(PROCESSED_PATH / "nasdaq100_features.csv", index=False)
    print("Final dataset saved to data/processed/nasdaq100_features.csv")

if __name__ == "__main__":
    merge_datasets()


Final dataset saved to data/processed/nasdaq100_features.csv
