In [None]:
# Use Jupyter magic to run pip installs inside the notebook kernel
%pip install --upgrade pip
%pip install -r requirements.txt

In [1]:
# Create news sample JSONs from Fin_Cleaned.csv
import pandas as pd, json, os
repo_root = r"D:\University of San Diego\Natural Language processing and GenAI\FinAgentX Multi Agent AI System\FinAgentX-Multi-Agent-AI-System"
csv_path = os.path.join(repo_root, "data", "Fin_Cleaned.csv")
out_dir = os.path.join(repo_root, "data", "sample")
os.makedirs(out_dir, exist_ok=True)

df = pd.read_csv(csv_path)

# Ensure expected columns exist
for col in ['Headline','Synopsis','Full_text','Date_published','Final Status']:
    if col not in df.columns:
        df[col] = ''

# helper
def make_news_file(query_terms, out_filename, desired_n=5):
    pattern = "|".join(query_terms)
    mask = df[['Headline','Synopsis','Full_text']].apply(
        lambda col: col.str.lower().str.contains(pattern, na=False)
    ).any(axis=1)
    matched = df[mask].copy()
    # pad if fewer
    if matched.shape[0] < desired_n:
        needed = desired_n - matched.shape[0]
        rest = df[~df.index.isin(matched.index)].sample(n=needed, random_state=1)
        matched = pd.concat([matched, rest], ignore_index=True)
    else:
        matched = matched.head(desired_n)
    arr=[]
    for _,row in matched.iterrows():
        obj = {
            "title": str(row.get("Headline","")).strip(),
            "description": str(row.get("Synopsis","")).strip(),
            "content": str(row.get("Full_text","")).strip(),
            "publishedAt": str(row.get("Date_published","")).strip(),
            "source": "Fin_Cleaned",
            "sentiment": str(row.get("Final Status","")).strip()
        }
        arr.append(obj)
    out_path = os.path.join(out_dir, out_filename)
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(arr, f, ensure_ascii=False, indent=2)
    print("Saved", len(arr), "articles to", out_path)
    return arr

# Create files
make_news_file(['apple','aapl'], "news_AAPL.json", desired_n=5)
make_news_file(['tesla','tsla'], "news_TSLA.json", desired_n=5)


Saved 5 articles to D:\University of San Diego\Natural Language processing and GenAI\FinAgentX Multi Agent AI System\FinAgentX-Multi-Agent-AI-System\data\sample\news_AAPL.json
Saved 5 articles to D:\University of San Diego\Natural Language processing and GenAI\FinAgentX Multi Agent AI System\FinAgentX-Multi-Agent-AI-System\data\sample\news_TSLA.json


[{'title': 'Nasdaq posts biggest daily percentage drop in a month on fears of aggressive Fed',
  'description': 'The Dow Jones Industrial Average fell 280.7 points, or 0.8%, to 34,641.18, the S&P 500 lost 57.52 points, or 1.26%, to 4,525.12 and the Nasdaq Composite dropped 328.39 points, or 2.26%, to 14,204.17.',
  'content': 'AgenciesWall Street\'s main indexes fell on Tuesday, dragged by weakness in tech and other growth stocks, after comments from Federal Reserve Governor Lael Brainard spooked investors about potential aggressive actions by the central bank to control inflation.\n\nThe tech-heavy Nasdaq posted its biggest daily percentage drop in about a month, with declines in heavyweight stocks such as Apple Inc and Amazon.com Inc .\n\nAt a conference on Tuesday, Brainard said she expects methodical interest rate increases and rapid reductions to the Fed\'s balance sheet to bring U.S. monetary policy to a "more neutral position" later this year, with further tightening to follow a

In [2]:
import os, json
for fname in ["news_AAPL.json","news_TSLA.json"]:
    p = os.path.join(repo_root, "data", "sample", fname)
    print(fname, "exists?", os.path.exists(p))
    if os.path.exists(p):
        with open(p,"r",encoding="utf-8") as f:
            arr = json.load(f)
        print(" -> articles:", len(arr))
        print(" -> first title:", arr[0]['title'][:120])


news_AAPL.json exists? True
 -> articles: 5
 -> first title: eMudhra: Should you exit stock after decent listing or hold for long term?
news_TSLA.json exists? True
 -> articles: 5
 -> first title: Nasdaq posts biggest daily percentage drop in a month on fears of aggressive Fed


In [3]:
# ensure yfinance is installed in the notebook kernel
%pip install yfinance

import os
import yfinance as yf
from datetime import datetime
repo_root = r"D:\University of San Diego\Natural Language processing and GenAI\FinAgentX Multi Agent AI System\FinAgentX-Multi-Agent-AI-System"
out_dir = os.path.join(repo_root, "data", "sample")
os.makedirs(out_dir, exist_ok=True)
tickers = ["AAPL","TSLA"]
for t in tickers:
    tk = yf.Ticker(t)
    hist = tk.history(period="1mo")
    fname = f"{t}_prices_1mo_{datetime.now().strftime('%Y%m%d')}.csv"
    out_path = os.path.join(out_dir, fname)
    hist.to_csv(out_path)
    print("Saved price CSV:", out_path)


Collecting yfinance
  Using cached yfinance-0.2.66-py2.py3-none-any.whl.metadata (6.0 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Using cached multitasking-0.0.12-py3-none-any.whl
Collecting frozendict>=2.3.4 (from yfinance)
  Using cached frozendict-2.4.6-py312-none-any.whl.metadata (23 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Using cached peewee-3.18.2-py3-none-any.whl
Collecting beautifulsoup4>=4.11.1 (from yfinance)
  Using cached beautifulsoup4-4.14.2-py3-none-any.whl.metadata (3.8 kB)
Collecting curl_cffi>=0.7 (from yfinance)
  Using cached curl_cffi-0.13.0-cp39-abi3-win_amd64.whl.metadata (13 kB)
Collecting websockets>=13.0 (from yfinance)
  Using cached websockets-15.0.1-cp312-cp312-win_amd64.whl.metadata (7.0 kB)
Collecting soupsieve>1.2 (from beautifulsoup4>=4.11.1->yfinance)
  Using cached soupsieve-2.8-py3-none-any.whl.metadata (4.6 kB)
Collecting cffi>=1.12.0 (from curl_cffi>=0.7->yfinance)
  Using cached cffi-2.0.0-cp312-cp312-win_amd64.whl.metadata (2.6 k

In [None]:
# fetch_prices cell
import yfinance as yf
import pandas as pd
from datetime import datetime
import os

os.makedirs("data/sample", exist_ok=True)
tickers = ["AAPL","TSLA"]
for t in tickers:
    tk = yf.Ticker(t)
    hist = tk.history(period="1mo")  # 1 month sample
    fname = f"data/sample/{t}_prices_1mo_{datetime.now().strftime('%Y%m%d')}.csv"
    hist.to_csv(fname)
    print("Saved:", fname)
    display(hist.tail(3))
