In [1]:
import math

def norm_cdf(x):
    """标准正态分布的累积分布函数 (CDF)"""
    return 0.5 * (1 + math.erf(x / math.sqrt(2)))


def black_scholes_call(S, K, T, r, sigma):
    """Black-Scholes 看涨期权价格"""
    if T <= 0:
        return max(S - K, 0)
    d1 = (math.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * math.sqrt(T))
    d2 = d1 - sigma * math.sqrt(T)
    return S * norm_cdf(d1) - K * math.exp(-r * T) * norm_cdf(d2)


def black_scholes_put(S, K, T, r, sigma):
    """Black-Scholes 看跌期权价格"""
    if T <= 0:
        return max(K - S, 0)
    d1 = (math.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * math.sqrt(T))
    d2 = d1 - sigma * math.sqrt(T)
    return K * math.exp(-r * T) * norm_cdf(-d2) - S * norm_cdf(-d1)


def straddle_price(spot, strike, iv, days_to_expiry, rate):
    """
    计算 Straddle（跨式组合）价格 = 1 份看涨 + 1 份看跌（同一行权价）

    参数:
        spot: 现货价格 (S)
        strike: 行权价格 (K)
        iv: 隐含波动率，小数形式 (如 0.2 表示 20%)
        days_to_expiry: 到期天数
        rate: 无风险利率，年化小数形式 (如 0.05 表示 5%)

    返回:
        straddle 总价格
    """
    T = days_to_expiry / 365.0  # 转换为年
    call = black_scholes_call(spot, strike, T, rate, iv)
    put = black_scholes_put(spot, strike, T, rate, iv)
    return call + put

In [53]:
# 示例：现货 100，行权 100，隐含波动率 20%，30 天到期，利率 5%
spot = 75
strike = 75
iv = 0.8015       # 20%
days = 14
rate = 0.0425        # 5%

price = straddle_price(spot, strike, iv, days, rate)
print(f"Straddle 价格: {price:.4f}")

Straddle 价格: 9.3766


In [54]:
# 示例：现货 100，行权 100，隐含波动率 20%，30 天到期，利率 5%
spot = 445
strike = 445
iv = 0.4791       # 20%
days = 14
rate = 0.0425        # 5%

price = straddle_price(spot, strike, iv, days, rate)
print(f"Straddle 价格: {price:.4f}")

Straddle 价格: 33.2810


In [65]:
# 每月投入 0.5w 美金，年化收益率 70%，6年的收益率计算
# 月收益率 = (1 + 0.7)^(1/12) - 1
monthly_rate = (1.5) ** (1/12)
monthly_investment = 0.5  # 万美金

# 72个月的复利计算
total = 0
for month in range(120):  # 6年 = 72个月
    total = total * monthly_rate + monthly_investment

print(f"最终金额: {total:.2f} 万美金")

最终金额: 824.43 万美金


In [69]:
monthly_rate = (1.5) 
monthly_investment = 6  # 万美金

# 72个月的复利计算
total = 0
for month in range(6):  # 6年 = 72个月
    total = total * monthly_rate + monthly_investment

print(f"最终金额: {total:.2f} 万美金")

最终金额: 124.69 万美金


In [67]:
import pymupdf

doc = pymupdf.open("/Users/deng/Desktop/alpha-gpt/alpha-gpt/1601.00991v3.pdf")
text = "\n".join(page.get_text() for page in doc)

In [71]:
import re

def split_alpha101(text: str):
    """
    Split Alpha101 paper into chunks by Alpha# definitions.
    """
    pattern = re.compile(r"(Alpha#\d+[\s\S]*?)(?=Alpha#\d+|$)")
    matches = pattern.findall(text)

    chunks = []
    for m in matches:
        header = re.search(r"Alpha#(\d+)", m)
        alpha_id = int(header.group(1)) if header else None

        chunks.append({
            "alpha_id": alpha_id,
            "content": m.strip()
        })
    return chunks


alpha_chunks = split_alpha101(text)
print(f"Extracted {len(alpha_chunks)} alpha chunks")

Extracted 103 alpha chunks


In [79]:
alpha_chunks = alpha_chunks[2:]

In [81]:
def build_alpha101_documents(alpha_chunks):
    docs = []

    for c in alpha_chunks:
        alpha_id = c["alpha_id"]
        content = c["content"]

        docs.append({
            "text": content,
            "metadata": {
                "kb_type": "paper",
                "paper": "Alpha101",
                "alpha_id": alpha_id,
                "source": "Kakushadze 2015",
                "semantic_hint": f"alpha {alpha_id} quantitative trading signal formula"
            }
        })
    return docs


paper_docs = build_alpha101_documents(alpha_chunks)

In [74]:
def clean_text(text: str) -> str:
    lines = text.splitlines()
    lines = [l.strip() for l in lines if len(l.strip()) > 0]
    return "\n".join(lines)


for d in paper_docs:
    d["text"] = clean_text(d["text"])

In [84]:
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
load_dotenv()
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large"
)

In [85]:
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x16bea2130>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x118faae50>, model='text-embedding-3-large', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [88]:
from langchain.vectorstores import FAISS

texts = [d["text"] for d in paper_docs]
metadatas = [d["metadata"] for d in paper_docs]

paper_db = FAISS.from_texts(
    texts=texts,
    metadatas=metadatas,
    embedding=embeddings
)

In [89]:
paper_db

<langchain_community.vectorstores.faiss.FAISS at 0x16c462dc0>

In [90]:
paper_db.save_local("/Users/deng/Desktop/alpha-gpt/alpha-gpt/src/agent/database/faiss_alpha101_paper_db")

In [3]:
import json
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from dotenv import load_dotenv
load_dotenv()

def load_jsonl(path):
    records = []
    bad_lines = []

    with open(path, "r", encoding="utf-8") as f:
        for idx, line in enumerate(f, 1):
            line = line.strip()

            # 跳过空行
            if not line:
                continue

            try:
                records.append(json.loads(line))
            except json.JSONDecodeError as e:
                bad_lines.append((idx, line, str(e)))

    if bad_lines:
        print(f"[WARN] {len(bad_lines)} invalid json lines in {path}")
        for idx, line, err in bad_lines[:5]:
            print(f"Line {idx}: {err}")
            print(line[:200])
            print("-" * 40)

    return records

def build_embedding_text(item):
    """
    Construct text used for embedding.
    """
    parts = [
        f"Name: {item.get('name', '')}",
        f"Description: {item.get('description', '')}",
        f"Semantic: {item.get('semantic_text', '')}",
    ]
    return "\n".join(p for p in parts if p.strip())

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large"
)

def build_faiss_from_jsonl(jsonl_path, save_path):
    records = load_jsonl(jsonl_path)

    texts = []
    metadatas = []

    for item in records:
        texts.append(build_embedding_text(item))
        metadatas.append(item)  # 整条 metadata 保留，方便回溯

    db = FAISS.from_texts(
        texts=texts,
        metadatas=metadatas,
        embedding=embeddings
    )

    db.save_local(save_path)
    print(f"Saved FAISS DB to {save_path}, size={len(texts)}")

    return db

build_faiss_from_jsonl(
    jsonl_path="/Users/deng/Desktop/alpha-gpt/alpha-gpt/rag_fields/layer1_modules.jsonl",
    save_path="/Users/deng/Desktop/alpha-gpt/alpha-gpt/src/agent/database/faiss_layer1_modules"
)

build_faiss_from_jsonl(
    jsonl_path="/Users/deng/Desktop/alpha-gpt/alpha-gpt/rag_fields/layer2_fields.jsonl",
    save_path="/Users/deng/Desktop/alpha-gpt/alpha-gpt/src/agent/database/faiss_layer2_fields"
)

build_faiss_from_jsonl(
    jsonl_path="/Users/deng/Desktop/alpha-gpt/alpha-gpt/rag_fields/op.jsonl",
    save_path="/Users/deng/Desktop/alpha-gpt/alpha-gpt/src/agent/database/faiss_operators"
)

Saved FAISS DB to /Users/deng/Desktop/alpha-gpt/alpha-gpt/src/agent/database/faiss_layer1_modules, size=15
Saved FAISS DB to /Users/deng/Desktop/alpha-gpt/alpha-gpt/src/agent/database/faiss_layer2_fields, size=460
Saved FAISS DB to /Users/deng/Desktop/alpha-gpt/alpha-gpt/src/agent/database/faiss_operators, size=38


<langchain_community.vectorstores.faiss.FAISS at 0x10a0f6ca0>

In [15]:
layer1_db = FAISS.load_local(
    "/Users/deng/Desktop/alpha-gpt/alpha-gpt/src/agent/database/faiss_layer1_modules",
    embeddings,
    allow_dangerous_deserialization=True
)

layer2_db = FAISS.load_local(
    "/Users/deng/Desktop/alpha-gpt/alpha-gpt/src/agent/database/faiss_layer2_fields",
    embeddings,
    allow_dangerous_deserialization=True
)

op_db = FAISS.load_local(
    "/Users/deng/Desktop/alpha-gpt/alpha-gpt/src/agent/database/faiss_operators",
    embeddings,
    allow_dangerous_deserialization=True
)
papers_db = FAISS.load_local(
    "/Users/deng/Desktop/alpha-gpt/alpha-gpt/src/agent/database/faiss_alpha101_paper_db",
    embeddings,
    allow_dangerous_deserialization=True
)

In [16]:
query = "order cancellation information and short term price reversal"

docs = papers_db.similarity_search(query, k=3)

for d in docs:
    print("Alpha ID:", d.metadata["alpha_id"])
    print(d.page_content[:300])
    print("=" * 80)

Alpha ID: 69
Alpha#69: ((rank(ts_max(delta(IndNeutralize(vwap, IndClass.industry), 2.72412), 
4.79344))^Ts_Rank(correlation(((close * 0.490655) + (vwap * (1 - 0.490655))), adv20, 4.92416), 
9.0615)) * -1)
Alpha ID: 58
Alpha#58: (-1 * Ts_Rank(decay_linear(correlation(IndNeutralize(vwap, IndClass.sector), volume, 
3.92795), 7.89291), 5.50322))
Alpha ID: 10
Alpha#10: rank(((0 < ts_min(delta(close, 1), 4)) ? delta(close, 1) : ((ts_max(delta(close, 1), 4) < 0) 
? delta(close, 1) : (-1 * delta(close, 1)))))


In [5]:
query = "change rate of large buy order cancellations"

docs = layer2_db.similarity_search(query, k=3)

for d in docs:
    print(d.metadata["name"])
    print(d.metadata["description"])
    print("-" * 40)

CancelLine.CancelLargeBuyDuration_ChangeRate
Relative change rate of timing associated with large buy-side canceled orders compared to the previous 5-minute interval.
----------------------------------------
CancelLine.CancelLargeBuyVolume_ChangeRate
Relative change rate of large buy-side canceled order volume compared to the previous 5-minute interval.
----------------------------------------
CancelLine_vs_TradeLine.LargeBuyVolume_Ratio
Ratio of large buy-side volume between canceled orders and executed trades.
----------------------------------------


In [6]:
from openai import OpenAI

client = OpenAI()

models = client.models.list()
for m in models.data:
    print(m.id)

gpt-4-0613
gpt-4
gpt-3.5-turbo
gpt-5.2-codex
gpt-4o-mini-tts-2025-12-15
gpt-realtime-mini-2025-12-15
gpt-audio-mini-2025-12-15
chatgpt-image-latest
davinci-002
babbage-002
gpt-3.5-turbo-instruct
gpt-3.5-turbo-instruct-0914
dall-e-3
dall-e-2
gpt-4-1106-preview
gpt-3.5-turbo-1106
tts-1-hd
tts-1-1106
tts-1-hd-1106
text-embedding-3-small
text-embedding-3-large
gpt-4-0125-preview
gpt-4-turbo-preview
gpt-3.5-turbo-0125
gpt-4-turbo
gpt-4-turbo-2024-04-09
gpt-4o
gpt-4o-2024-05-13
gpt-4o-mini-2024-07-18
gpt-4o-mini
gpt-4o-2024-08-06
chatgpt-4o-latest
gpt-4o-audio-preview
gpt-4o-realtime-preview
omni-moderation-latest
omni-moderation-2024-09-26
gpt-4o-realtime-preview-2024-12-17
gpt-4o-audio-preview-2024-12-17
gpt-4o-mini-realtime-preview-2024-12-17
gpt-4o-mini-audio-preview-2024-12-17
o1-2024-12-17
o1
gpt-4o-mini-realtime-preview
gpt-4o-mini-audio-preview
o3-mini
o3-mini-2025-01-31
gpt-4o-2024-11-20
gpt-4o-search-preview-2025-03-11
gpt-4o-search-preview
gpt-4o-mini-search-preview-2025-03-11
gpt