In [None]:
import os
import json
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from textblob import TextBlob
from alpaca_trade_api.rest import REST, TimeFrame

# ------------------- Configuration -------------------
ALPACA_API_KEY    = os.getenv('ALPACA_API_KEY', 'YOUR_ALPACA_API_KEY')
ALPACA_SECRET_KEY = os.getenv('ALPACA_SECRET_KEY', 'YOUR_ALPACA_SECRET_KEY')

# Paper endpoint for trading calls (not used here)
ALPACA_BASE_URL   = 'https://paper-api.alpaca.markets'

# Market data endpoint
ALPACA_DATA_URL   = 'https://data.alpaca.markets'

STOCK             = 'AAPL'
START_DATE        = datetime(2025, 4, 7)
END_DATE          = datetime(2025, 4, 11)
INITIAL_CAPITAL   = 100_000

# Cache files
NEWS_CACHE_FILE   = 'alpaca_news_cache.json'
PRICE_CACHE_FILE  = 'alpaca_price_cache.csv'

# ------------------- Alpaca API Clients -------------------
# One client for market data (bars)
data_api = REST(ALPACA_API_KEY, ALPACA_SECRET_KEY,
               ALPACA_DATA_URL, api_version='v2')
# One client for news (same credentials)
news_api = REST(ALPACA_API_KEY, ALPACA_SECRET_KEY,
                ALPACA_BASE_URL, api_version='v2')

# ------------------- Cache Management -------------------
def load_json(path):
    if os.path.exists(path):
        with open(path, 'r') as f:
            return json.load(f)
    return {}

def save_json(obj, path):
    with open(path, 'w') as f:
        json.dump(obj, f)

def load_price_cache():
    if os.path.exists(PRICE_CACHE_FILE):
        return pd.read_csv(PRICE_CACHE_FILE, parse_dates=['date'])
    return pd.DataFrame(columns=['date','open','close'])

def save_price_cache(df):
    df.to_csv(PRICE_CACHE_FILE, index=False)

# ------------------- Data Prefetching -------------------
def prefetch_price_data(symbol, start_date, end_date):
    """Fetch daily OHLC bars from Alpaca and cache them."""
    # Fetch bars (end is exclusive)
    bars = data_api.get_bars(
        symbol,
        TimeFrame.Day,
        start=start_date.isoformat(),
        end=(end_date + timedelta(days=1)).isoformat()
    ).df
    # Filter by symbol if grouped
    if 'symbol' in bars.columns:
        bars = bars[bars['symbol'] == symbol]
    # Prepare DataFrame
    df = bars[['open','close']].copy()
    df['date'] = df.index.date
    df = df.reset_index(drop=True)[['date','open','close']]
    save_price_cache(df)
    return df

# ------------------- Sentiment Analysis -------------------
def get_sentiment(symbol, date, news_cache, offline_mode=False):
    """Fetch news via Alpaca and compute average polarity."""
    key = f"{symbol}_{date.strftime('%Y-%m-%d')}"
    if key in news_cache:
        return news_cache[key]

    if offline_mode:
        np.random.seed(hash(key) % 2**32)
        score = np.random.uniform(-1, 1)
    else:
        # Alpaca News API: parameters use ISO 8601
        start = date.isoformat() + 'T00:00:00Z'
        end   = (date + timedelta(days=1)).isoformat() + 'T00:00:00Z'
        articles = news_api.get_news(
            symbol,
            start=start,
            end=end,
            limit=50  # max articles per call
        )
        scores = []
        for art in articles:
            text = art.summary or art.headline or ''
            if text:
                scores.append(TextBlob(text).sentiment.polarity)
        score = float(np.mean(scores)) if scores else 0.0

    news_cache[key] = score
    save_json(news_cache, NEWS_CACHE_FILE)
    return score

# ------------------- Strategy Simulation -------------------
def run_sentiment_strategy(
    symbol,
    start_date,
    end_date,
    initial_capital,
    offline_mode=False
):
    """
    Simulate one-week sentiment-driven strategy for a single stock via Alpaca.
    Long if sentiment >= 0, short if sentiment < 0.
    """
    dates = pd.date_range(start_date, end_date)
    # Load caches
    news_cache  = load_json(NEWS_CACHE_FILE)
    price_cache = load_price_cache()
    # Ensure prices are cached
    if price_cache.empty:
        price_cache = prefetch_price_data(symbol, start_date, end_date)

    portfolio = initial_capital
    results = []

    for dt in dates:
        sentiment = get_sentiment(symbol, dt, news_cache, offline_mode)
        # Lookup prices
        row = price_cache[price_cache['date'] == dt.date()]
        if row.empty:
            open_p, close_p = np.nan, np.nan
        else:
            open_p  = float(row['open'].iloc[0])
            close_p = float(row['close'].iloc[0])
        # Compute return based on sentiment
        if np.isnan(open_p) or np.isnan(close_p):
            ret = 0.0
        else:
            if sentiment >= 0:
                ret = (close_p - open_p) / open_p
            else:
                ret = (open_p - close_p) / open_p
        portfolio *= (1 + ret)
        results.append({
            'date': dt.strftime('%Y-%m-%d'),
            'sentiment': sentiment,
            'open': open_p,
            'close': close_p,
            'daily_return': ret,
            'portfolio_value': portfolio
        })

    return pd.DataFrame(results)


In [None]:
# ------------------- Example Usage -------------------

# Remove old caches
for f in [NEWS_CACHE_FILE, PRICE_CACHE_FILE]:
    try: os.remove(f)
    except: pass

# Prefetch price data once
prefetch_price_data(STOCK, START_DATE, END_DATE)

# Run simulation (offline_mode=True for faster dev)
df = run_sentiment_strategy(
    STOCK,
    START_DATE,
    END_DATE,
    INITIAL_CAPITAL,
    offline_mode=False
)
df.head(15)