In [1]:
import os
import sys
import pandas as pd

# So we can import from project root (scripts/)
ROOT_DIR = os.path.abspath("..")
if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)

DATA_DIR = os.path.join(ROOT_DIR, "data")
PROCESSED_DIR = os.path.join(DATA_DIR, "processed")

TICKERS = ["AAPL", "AMZN", "GOOG", "META", "MSFT", "NVDA", "TSLA"]

from scripts.news_stock_alignment import load_price_data
from scripts.returns_utils import compute_daily_returns


In [2]:
prices = load_price_data(
    tickers=TICKERS,
    data_dir=DATA_DIR,
    date_col="Date",   # or your actual date column name in price CSVs
)

print("Price sample:")
display(prices.head())

print("\nColumns:", prices.columns.tolist())
print("Date range:", prices["Date"].min(), "→", prices["Date"].max())


Price sample:


Unnamed: 0,Date,Ticker,Close,High,Low,Open,Volume
0,2009-01-02,AAPL,2.721686,2.730385,2.554037,2.57563,746015200
1,2009-01-05,AAPL,2.836553,2.884539,2.780469,2.794266,1181608400
2,2009-01-06,AAPL,2.789767,2.914229,2.770872,2.877641,1289310400
3,2009-01-07,AAPL,2.729484,2.77417,2.70699,2.753477,753048800
4,2009-01-08,AAPL,2.780169,2.793666,2.700393,2.71209,673500800



Columns: ['Date', 'Ticker', 'Close', 'High', 'Low', 'Open', 'Volume']
Date range: 2009-01-02 00:00:00 → 2023-12-29 00:00:00


In [3]:
prices_with_ret = compute_daily_returns(
    prices,
    date_col="Date",
    ticker_col="Ticker",  # or 'stock' if your price files use that name
    close_col="Close",
    return_col="DailyReturn",
)

print("Prices with daily returns (sample):")
display(
    prices_with_ret[["Ticker", "Date", "Close", "DailyReturn"]]
    .sort_values(["Ticker", "Date"])
    .head(15)
)


Prices with daily returns (sample):


Unnamed: 0,Ticker,Date,Close,DailyReturn
0,AAPL,2009-01-02,2.721686,
1,AAPL,2009-01-05,2.836553,0.042204
2,AAPL,2009-01-06,2.789767,-0.016494
3,AAPL,2009-01-07,2.729484,-0.021609
4,AAPL,2009-01-08,2.780169,0.01857
5,AAPL,2009-01-09,2.716589,-0.022869
6,AAPL,2009-01-12,2.659007,-0.021196
7,AAPL,2009-01-13,2.630514,-0.010716
8,AAPL,2009-01-14,2.559135,-0.027135
9,AAPL,2009-01-15,2.500652,-0.022853


In [4]:
os.makedirs(PROCESSED_DIR, exist_ok=True)

returns_csv_path = os.path.join(PROCESSED_DIR, "prices_with_daily_returns.csv")
prices_with_ret.to_csv(returns_csv_path, index=False)

print(f"Saved prices + daily returns to:\n{returns_csv_path}")


Saved prices + daily returns to:
c:\Users\filimon.hailemariam\Downloads\Price Movement -Week1\Predicting-Price-Moves-with-News-Sentiment\data\processed\prices_with_daily_returns.csv
