In [1]:
import {z} from "zod";
import {Agent, AgentInputItem, Runner } from "@openai/agents";

const StocksSchema = z.object({});
const stocks = new Agent({
  name: "Stocks",
  instructions: "You will search up the latest news on Apple, Microsoft and Google stocks and return a small analysis on whether to buy/sell/hold stocsk",
  model: "gpt-5-nano",
  outputType: StocksSchema,
  modelSettings: {
    reasoning: {
      effort: "low"
    },
    store: true
  }
});

type WorkflowInput = { input_as_text: string };


// Main code entrypoint
export const runWorkflow = async (workflow: WorkflowInput) => {
  const conversationHistory: AgentInputItem[] = [
    {
      role: "user",
      content: [
        {
          type: "input_text",
          text: workflow.input_as_text
        }
      ]
    }
  ];
  const runner = new Runner({
    traceMetadata: {
      __trace_source__: "agent-builder",
      workflow_id: "wf_68e53cd07cd48190bb6c10849dc076e303d07c1e123f4f78"
    }
  });
  const stocksResultTemp = await runner.run(
    stocks,
    [
      ...conversationHistory
    ]
  );
  conversationHistory.push(...stocksResultTemp.newItems.map((item) => item.rawItem));

  if (!stocksResultTemp.finalOutput) {
      throw new Error("Agent result is undefined");
  }

  const stocksResult = {
    output_text: JSON.stringify(stocksResultTemp.finalOutput),
    output_parsed: stocksResultTemp.finalOutput
  };
}


SyntaxError: invalid syntax (ipython-input-852565369.py, line 1)

In [None]:
!pip install -q pandas_ta

In [2]:
# ===========================
# Polygon Data Downloader (5y Hourly) + Selected Features for MULTIPLE Tickers
# ===========================

# Colab/Drive + libs
import os, time, math, json, requests
import pandas as pd
from datetime import datetime, timedelta
from google.colab import drive
try:
    import pandas_ta as ta  # optional; we fall back to manual if missing/quirky
except Exception:
    ta = None

# --- CONNECT GOOGLE DRIVE ---
drive.mount('/content/drive')

# --- API KEY ---
# Prefer setting this in Colab (Runtime → Run all, then re-run) or via:
# os.environ["POLYGON_API_KEY"] = "YOUR_KEY_HERE"
POLYGON_API_KEY = os.environ.get("POLYGON_API_KEY", "fbrZKMMbLhV7p0Rz_C9PKM69eps7P8OG")
if POLYGON_API_KEY == "REPLACE_ME" or not POLYGON_API_KEY:
    raise RuntimeError("Set POLYGON_API_KEY in environment before running.")

# --- OUTPUT DIR ---
OUT_DIR = "/content/drive/MyDrive/polygon_hourly_5y_multi"
os.makedirs(OUT_DIR, exist_ok=True)

# --- TICKERS TO DOWNLOAD (edit this list) ---
TICKERS = ["AAPL","MSFT","NVDA","AMD","JPM","GS","XOM","CVX","PG","KO"]

# --- DATE RANGE: last 5 years up to today (UTC) ---
end_date  = pd.Timestamp.utcnow().normalize().strftime("%Y-%m-%d")
start_date = (pd.Timestamp.utcnow().normalize() - pd.DateOffset(years=5)).strftime("%Y-%m-%d")

# --- CORE DOWNLOAD (Polygon aggs) ---
def download_polygon_data(ticker: str,
                          start_date: str,
                          end_date: str,
                          multiplier: int = 1,
                          timespan: str = "hour",
                          adjusted: bool = True,
                          sleep_s: float = 0.15) -> pd.DataFrame:
    key = POLYGON_API_KEY
    base_url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/{multiplier}/{timespan}/{start_date}/{end_date}"
    params = {"adjusted": str(adjusted).lower(), "sort": "asc", "limit": 50000, "apiKey": key}

    all_rows = []
    url = base_url
    print(f"[DL] {ticker}: {timespan} {start_date}→{end_date}")
    while True:
        r = requests.get(url, params=params if url == base_url else None, timeout=60)
        if r.status_code != 200:
            raise RuntimeError(f"[DL_ERR] {ticker} HTTP {r.status_code}: {r.text[:200]}")
        data = r.json()
        if data.get("status") not in ("OK","DELAYED"):
            raise RuntimeError(f"[DL_ERR] {ticker} bad status: {data.get('status')} {data}")
        results = data.get("results", [])
        for x in results:
            all_rows.append({
                "timestamp": pd.to_datetime(x["t"], unit="ms", utc=True),
                "open": float(x["o"]),
                "high": float(x["h"]),
                "low": float(x["l"]),
                "close": float(x["c"]),
                "volume": float(x["v"]),
                "vwap": float(x["vw"]) if x.get("vw") is not None else None
            })
        next_url = data.get("next_url")
        if not next_url:
            break
        # polygon returns next_url without apiKey param sometimes
        if "apiKey=" not in next_url:
            next_url += ("&" if "?" in next_url else "?") + f"apiKey={key}"
        url = next_url
        time.sleep(sleep_s)

    df = pd.DataFrame(all_rows).sort_values("timestamp").reset_index(drop=True)
    print(f"[DL] {ticker}: {len(df)} rows")
    return df

# --- FEATURES (3 only): price_change_pct, MACDh_12_26_9, BBM_20_2.0 ---
def add_selected_features(df: pd.DataFrame) -> pd.DataFrame:
    # 1) price_change_pct
    df["price_change_pct"] = df["close"].pct_change() * 100.0

    # 2) MACD histogram (MACDh_12_26_9)
    macdh_col = "MACDh_12_26_9"
    try:
        if ta is not None:
            df.ta.macd(fast=12, slow=26, signal=9, append=True)  # adds MACD_*, MACDh_*, MACDs_*
            if macdh_col not in df.columns:
                macd_col, macds_col = "MACD_12_26_9", "MACDs_12_26_9"
                if macd_col in df.columns and macds_col in df.columns:
                    df[macdh_col] = df[macd_col] - df[macds_col]
                else:
                    raise RuntimeError("pandas_ta MACD columns missing")
        else:
            raise RuntimeError("pandas_ta not available")
    except Exception:
        # Manual MACD → MACD histogram
        ema12 = df["close"].ewm(span=12, adjust=False).mean()
        ema26 = df["close"].ewm(span=26, adjust=False).mean()
        macd = ema12 - ema26
        signal = macd.ewm(span=9, adjust=False).mean()
        df[macdh_col] = macd - signal

    # 3) BBM_20_2.0 (Bollinger middle band = SMA20)
    df["BBM_20_2.0"] = df["close"].rolling(window=20, min_periods=20).mean()

    # Keep only required columns (ensure presence)
    keep = ["timestamp","open","high","low","close","volume","vwap",
            "price_change_pct","MACDh_12_26_9","BBM_20_2.0"]
    keep = [c for c in keep if c in df.columns]
    out = df[keep].dropna().reset_index(drop=True)
    return out

# --- Ensure VWAP exists; if all NA, approximate with rolling TWAP-like proxy ---
def ensure_vwap(df: pd.DataFrame, lookback: int = 20) -> pd.DataFrame:
    if "vwap" not in df.columns or df["vwap"].isna().all():
        # proxy using rolling (vol*close)/vol; for hourly, this is a simple fallback
        vol_roll = df["volume"].rolling(window=lookback, min_periods=1).sum()
        vc_roll  = (df["volume"] * df["close"]).rolling(window=lookback, min_periods=1).sum()
        df["vwap"] = vc_roll / vol_roll
    else:
        # fill small holes if any
        df["vwap"] = df["vwap"].fillna(method="ffill").fillna(method="bfill")
    return df

# --- MAIN BATCH ---
manifest = []
errors   = {}

for tkr in TICKERS:
    try:
        raw = download_polygon_data(tkr, start_date, end_date, multiplier=1, timespan="hour", adjusted=True)
        if raw.empty:
            print(f"[WARN] {tkr}: no data returned; skipping")
            errors[tkr] = "no_data"
            continue

        raw = ensure_vwap(raw)
        feat = add_selected_features(raw.copy())

        save_path = os.path.join(OUT_DIR, f"{tkr}_hourly_last5y_selected_features.csv")
        feat.to_csv(save_path, index=False)
        print(f"[SAVE] {tkr}: {save_path}  rows={len(feat)}")

        manifest.append({
            "ticker": tkr,
            "rows": len(feat),
            "path": save_path,
            "start": feat["timestamp"].iloc[0].isoformat(),
            "end":   feat["timestamp"].iloc[-1].isoformat(),
            "cols": list(feat.columns)
        })

        # light pacing to respect rate limits
        time.sleep(0.25)

    except Exception as e:
        print(f"[ERR] {tkr}: {e}")
        errors[tkr] = str(e)

# --- Write a small manifest JSON in the same folder ---
man_path = os.path.join(OUT_DIR, "download_manifest.json")
with open(man_path, "w") as f:
    json.dump({"tickers": TICKERS, "manifest": manifest, "errors": errors}, f, indent=2)
print(f"[DONE] Manifest saved: {man_path}")
print(f"[SUMMARY] ok={len(manifest)} err={len(errors)}")


Mounted at /content/drive
[DL] AAPL: hour 2020-10-10→2025-10-10
[DL] AAPL: 20043 rows


  df["vwap"] = df["vwap"].fillna(method="ffill").fillna(method="bfill")


[SAVE] AAPL: /content/drive/MyDrive/polygon_hourly_5y_multi/AAPL_hourly_last5y_selected_features.csv  rows=20024
[DL] MSFT: hour 2020-10-10→2025-10-10
[DL] MSFT: 20028 rows


  df["vwap"] = df["vwap"].fillna(method="ffill").fillna(method="bfill")


[SAVE] MSFT: /content/drive/MyDrive/polygon_hourly_5y_multi/MSFT_hourly_last5y_selected_features.csv  rows=20009
[DL] NVDA: hour 2020-10-10→2025-10-10
[DL] NVDA: 19961 rows


  df["vwap"] = df["vwap"].fillna(method="ffill").fillna(method="bfill")


[SAVE] NVDA: /content/drive/MyDrive/polygon_hourly_5y_multi/NVDA_hourly_last5y_selected_features.csv  rows=19942
[DL] AMD: hour 2020-10-10→2025-10-10
[DL] AMD: 20037 rows


  df["vwap"] = df["vwap"].fillna(method="ffill").fillna(method="bfill")


[SAVE] AMD: /content/drive/MyDrive/polygon_hourly_5y_multi/AMD_hourly_last5y_selected_features.csv  rows=20018
[DL] JPM: hour 2020-10-10→2025-10-10
[DL] JPM: 17875 rows


  df["vwap"] = df["vwap"].fillna(method="ffill").fillna(method="bfill")


[SAVE] JPM: /content/drive/MyDrive/polygon_hourly_5y_multi/JPM_hourly_last5y_selected_features.csv  rows=17856
[DL] GS: hour 2020-10-10→2025-10-10
[DL] GS: 15191 rows


  df["vwap"] = df["vwap"].fillna(method="ffill").fillna(method="bfill")


[SAVE] GS: /content/drive/MyDrive/polygon_hourly_5y_multi/GS_hourly_last5y_selected_features.csv  rows=15172
[DL] XOM: hour 2020-10-10→2025-10-10
[DL] XOM: 19339 rows


  df["vwap"] = df["vwap"].fillna(method="ffill").fillna(method="bfill")


[SAVE] XOM: /content/drive/MyDrive/polygon_hourly_5y_multi/XOM_hourly_last5y_selected_features.csv  rows=19320
[DL] CVX: hour 2020-10-10→2025-10-10
[DL] CVX: 17883 rows


  df["vwap"] = df["vwap"].fillna(method="ffill").fillna(method="bfill")


[SAVE] CVX: /content/drive/MyDrive/polygon_hourly_5y_multi/CVX_hourly_last5y_selected_features.csv  rows=17864
[DL] PG: hour 2020-10-10→2025-10-10
[DL] PG: 15034 rows


  df["vwap"] = df["vwap"].fillna(method="ffill").fillna(method="bfill")


[SAVE] PG: /content/drive/MyDrive/polygon_hourly_5y_multi/PG_hourly_last5y_selected_features.csv  rows=15015
[DL] KO: hour 2020-10-10→2025-10-10
[DL] KO: 18865 rows


  df["vwap"] = df["vwap"].fillna(method="ffill").fillna(method="bfill")


[SAVE] KO: /content/drive/MyDrive/polygon_hourly_5y_multi/KO_hourly_last5y_selected_features.csv  rows=18846
[DONE] Manifest saved: /content/drive/MyDrive/polygon_hourly_5y_multi/download_manifest.json
[SUMMARY] ok=10 err=0
