In [None]:
# Visualize Financial Market Data with matplotlib, Seaborn, and Plotly Dash
import numpy as np
import pandas as pf
from openbb import obb
import matplotlib as plt
import pandas as pd
obb.user.preferences.output_type= 'dataframe'

qqq = obb.equity.price.historical(
    'QQQ',
    provider='yfinance'
)
qqq_returns = qqq.close.pct_change()
aapl = obb.equity.price.historical(
    'AAPL',
    provider='yfinance'
)
aapl_returns = aapl.close.pct_change()

asset_bench = pd.concat([aapl_returns, qqq_returns], axis=1)

asset_bench.columns = ['AAPL', 'QQQ']
asset_bench.plot.scatter(x='QQQ', y='AAPL', s=0.25)

In [None]:
from pandas.plotting import scatter_matrix, bootstrap_plot
scatter_matrix(asset_bench)

In [None]:
bootstrap_plot(aapl_returns)

In [None]:
2
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import animation
from openbb import obb
from IPython.display import HTML
import warnings
warnings.filterwarnings('ignore')

obb.user.preferences.output_type = 'dataframe'

maturities = ['3m', '6m', '1y', '2y', '3y', '5y', '7y', '10y', '30y']
data = obb.fixedincome.government.treasury_rates(
    start_date='1985-01-01',
    end_date='2005-01-01',
    provider='federal_reserve'
).dropna(how='all').drop(columns=['month_1', 'year_20'])

data.columns = maturities
data['inverted'] = data['3m'] > data['10y']

# 检查数据
print(f"数据形状: {data.shape}")
print(f"数据范围: {data[maturities].min().min():.2f} - {data[maturities].max().max():.2f}")

# 设置matplotlib后端
plt.rcParams['animation.html'] = 'jshtml'
plt.close('all')  # 关闭之前的图形

fig, ax = plt.subplots(figsize=(12, 8))
line, = ax.plot([], [], 'o-', linewidth=2, markersize=8, alpha=0.8)

# 调整坐标轴范围
y_min = data[maturities].min().min()
y_max = data[maturities].max().max()

ax.set_xlim(0, len(maturities)-1)
ax.set_ylim(y_min, y_max)

ax.set_xticks(range(len(maturities)))
ax.set_xticklabels(maturities, rotation=45)
ax.set_xlabel('Time to Maturity', fontsize=12)
ax.set_ylabel('Yield (%)', fontsize=12)
ax.set_title('US Treasury Yield Curve', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)

def init_func():
    line.set_data([], [])
    return line,

def animate(i):
    if i >= len(data):
        return line,
    
    x = range(len(maturities))
    y = data[maturities].iloc[i].values
    dt_ = data.index[i].strftime('%Y-%m-%d')
    
    # 检查是否为倒挂
    if data['inverted'].iloc[i]:
        line.set_color('red')
        line.set_alpha(0.8)
    else:
        line.set_color('blue')
        line.set_alpha(0.8)
    
    line.set_data(x, y)
    ax.set_title(f'US Treasury Yield Curve - {dt_}', fontsize=14, fontweight='bold')
    
    return line,

# 减少帧数以提高性能
frame_step = max(1, len(data) // 500)  # 最多500帧
frames = range(0, len(data), frame_step)

# 创建动画
ani = animation.FuncAnimation(
    fig,
    animate,
    init_func=init_func,
    frames=frames,
    interval=50,
    blit=True,
    repeat=True,
    cache_frame_data=False
)

# 显示动画
HTML(ani.to_jshtml())




In [None]:
# Plotting options implied volatility surface with Matplotlib
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from openbb import obb
obb.user.preferences.output_type = 'dataframe'

chains = obb.derivatives.options.chains(
    'TSLA',
    provider='cboe'
)

calls = chains[chains['option_type'] == 'call']

calls = calls[
    (calls.dte < 100)
    & (calls.strike > 100)
]
calls.drop_duplicates(subset=['dte', 'strike'], keep=False, inplace=True)
# print(calls.head())

vol_surface = (
    calls
    .pivot(
        index='strike',
        columns='dte',
        values='implied_volatility'
    )
    .dropna(how='all', axis=1)
)
# print(vol_surface.head())

strike, dte  = np.meshgrid(
    vol_surface.columns, # x values
    vol_surface.index, # y values
)
print(f"网格形状: {strike.shape}, {dte.shape}")
print(strike[:5, :5])
print(dte[:5, :5])

fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel("Days to Expiration")
ax.set_ylabel("Strike Price")
ax.set_zlabel("Implied Volatility")
ax.set_title("AAPL Call Option Implied Volatility Surface")

ax.plot_surface(
    strike,
    dte,
    vol_surface.values,
    cmap='viridis'
)



In [None]:
# Visualizing statistical relationships with Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from openbb import obb
obb.user.preferences.output_type = 'dataframe'


data = obb.equity.price.historical(
    ['AAPL', 'SPY'],
    start_date= '2020-01-01',
    provider='yfinance'
).pivot(columns='symbol', values='close')

returns = data.pct_change().dropna()
returns = returns.reset_index() # add default index

melted = pd.melt(
    returns,
    id_vars=['date'],
    value_vars=['AAPL'],
    var_name='stock',
    value_name='returns'
)
print(melted.head())
melted['date'] = pd.to_datetime(melted['date'])
melted['month'] = melted['date'].dt.to_period('M')
print(melted.head())

g = sns.boxplot(
    x='month',
    y='returns',
    hue='stock',
    data=melted
)

g.set_xticklabels(
    melted['month'].unique(),
    rotation=90
)



In [None]:
print(returns.head())
returns.reset_index(inplace=True)

# returns.set_index('date', inplace=True)
g = sns.jointplot(
    x='SPY',
    y='AAPL',
    data=returns,
    kind='reg',
    truncate=False
)   


In [None]:
import requests
from io import StringIO

# Get Dow Jones Industrial Average components from Wikipedia
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get("https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average", headers=headers)
dji_tables = pd.read_html(StringIO(response.text))

# The components table is at index 2
dji_components = dji_tables[2]
dji_symbols = dji_components['Symbol'].tolist()

dji_data = obb.equity.price.historical(
    dji_symbols,
    start_date='2024-01-01',
    provider='yfinance'
).pivot(columns='symbol', values='close')


In [None]:
dji_returns = dji_data.pct_change(fill_method=None).dropna()
# print(dji_data.head())

corr = dji_returns.corr()
mask = np.triu(
    np.ones_like(corr, dtype=bool)
)

cmap = sns.diverging_palette(230, 20, as_cmap=True)
plt.rcParams['font.size'] = 8
sns.heatmap(
    corr,
    mask=mask,
    cmap=cmap,
    vmin=-1,
    vmax=1,
    center=0,
    square=True,
    linewidths=0.5
)

In [None]:
import datetime
import numpy as np
import pandas as pd
import dash
from dash import dcc, html
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import plotly.io as pio
from sklearn.decomposition import PCA
from openbb import obb
obb.user.preferences.output_type = "dataframe"

pio.templates.default = "plotly"
app = dash.Dash(__name__, external_stylesheets=[
    dbc.themes.BOOTSTRAP
])

ticker_field = [
    html.Label("Enter Ticker Symbols:"),
    dcc.Input(
        id="ticker-input",
        type="text",
        placeholder="Enter Tickers separated by commas (e.g. AAPL,MSFT)",
        style={"width": "50%"}
    ),
]

components_fiel = [
    html.Label("Select Number of Components:"),
    dcc.Dropdown(
        id="component-dropdown",
        options=[{"lable": i, "value": i} for i in range(1,6)],
        value=3,
        style={"width": "50%"}
    ),
]

date_picker_field = [
    html.Label("Select Date Range:"),
    dcc.DatePickerRange(
        id="date-picker",
        start_date=datetime.datetime.now() - datetime.timedelta(365*3),
        end_date=datetime.datetime.now(),
        display_format="YYYY-MM--DD"
    ),
]

submit = [
    html.Button("Submit", id="submit-button")
]

In [None]:
# Creating an interactive PCA analytics dashboard with Plotly Dash
import datetime
import numpy as np
import pandas as pd
import dash
from dash import dcc, html
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output, State
import plotly.graph_objs as go
import plotly.io as pio
from sklearn.decomposition import PCA
from openbb import obb  # 保留你原来的引用方式
obb.user.preferences.output_type = "dataframe"

# plotly 默认模板
pio.templates.default = "plotly"

# Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# --- UI 组件 ---
ticker_field = [
    html.Label("Enter Ticker Symbols:"),
    dcc.Input(
        id="ticker-input",
        type="text",
        placeholder="Enter tickers separated by commas (e.g. AAPL,MSFT)",
        style={"width": "50%"}
    ),
]

components_field = [
    html.Label("Select Number of Components:"),
    dcc.Dropdown(
        id="component-dropdown",
        options=[{"label": i, "value": i} for i in range(1, 6)],
        value=3,
        style={"width": "50%"}
    ),
]

date_picker_field = [
    html.Label("Select Date Range:"),
    dcc.DatePickerRange(
        id="date-picker",
        start_date=(datetime.datetime.now() - datetime.timedelta(days=365 * 3)).date(),
        end_date=datetime.datetime.now().date(),
        display_format="YYYY-MM-DD"
    ),
]

submit = [
    html.Button("Submit", id="submit-button", n_clicks=0, className="btn btn-primary")
]

# 布局
app.layout = dbc.Container(
    [
        html.H1("PCA on Stock Returns"),
        dbc.Row([dbc.Col(ticker_field)]),
        dbc.Row([dbc.Col(components_field)]),
        dbc.Row([dbc.Col(date_picker_field)]),
        dbc.Row([dbc.Col(submit)]),
        dbc.Row(
            [
                dbc.Col(dcc.Graph(id="bar-chart"), width=4),
                dbc.Col(dcc.Graph(id="line-chart"), width=4),
                dbc.Col(dcc.Graph(id="scatter-plot"), width=4),
            ],
            className="mt-4"
        )
    ],
    fluid=True,
)

# --- 回调函数 ---
@app.callback(
    Output("bar-chart", "figure"),
    Output("line-chart", "figure"),
    Output("scatter-plot", "figure"),
    Input("submit-button", "n_clicks"),
    State("ticker-input", "value"),
    State("component-dropdown", "value"),
    State("date-picker", "start_date"),
    State("date-picker", "end_date"),
)
def update_graphs(n_clicks, tickers, n_components, start_date, end_date):
    # 如果还没有输入 ticker，返回空图
    empty_fig = go.Figure()
    if not tickers or not isinstance(tickers, str) or tickers.strip() == "":
        return empty_fig, empty_fig, empty_fig

    # 处理 ticker 列表
    tickers_list = [t.strip().upper() for t in tickers.split(",") if t.strip() != ""]
    if len(tickers_list) == 0:
        return empty_fig, empty_fig, empty_fig

    # 解析日期，DatePickerRange 会返回 "YYYY-MM-DD" 字符串或 None
    try:
        start = pd.to_datetime(start_date).date()
        end = pd.to_datetime(end_date).date()
    except Exception:
        # 回退到默认三年区间
        end = datetime.datetime.now().date()
        start = end - datetime.timedelta(days=365 * 3)

    # 获取价格数据（使用 OpenBB 的 obb 接口）
    try:
        raw = obb.equity.price.historical(
            tickers_list,
            start_date=str(start),
            end_date=str(end),
            provider="polygon"
        )
        if isinstance(raw, pd.DataFrame) and {"symbol", "close"}.issubset(raw.columns):
            data = raw.pivot(columns="symbol", values="close")
        else:
            data = raw
    except Exception as e:
        print("Error fetching data:", e)
        return empty_fig, empty_fig, empty_fig

    print(data)
    # 计算对数收益或简单收益（使用简单收益）
    daily_returns = data.pct_change().dropna(how="all").dropna(axis=1, how="all")
    # shape (n_samples, n_features)
    # daily_returns.shape[0] = n_samples, daily_returns.shape[1] = n_features
    if daily_returns.shape[0] == 0 or daily_returns.shape[1] < 1:
        return empty_fig, empty_fig, empty_fig

    # 限制 n_components 不超过特征数
    max_components = min(n_components, daily_returns.shape[1], daily_returns.shape[0])
    if max_components < 1:
        return empty_fig, empty_fig, empty_fig

    # PCA
    pca = PCA(n_components=max_components)
    # normalization
    from sklearn.preprocessing import StandardScaler
    X_std = StandardScaler().fit_transform(daily_returns.values)
    pca.fit(X_std)
    # pca.fit(daily_returns.values)  # 输入 shape (n_samples, n_features)
    explained_var_ratio = pca.explained_variance_ratio_

    # Bar chart: 每个主成分的解释方差比
    pcs = [f"PC{i+1}" for i in range(max_components)]
    bar_fig = go.Figure(
        data=[go.Bar(x=pcs, y=explained_var_ratio)],
        layout=go.Layout(title="Explained Variance by Component",
                         xaxis=dict(title="Principal Component"),
                         yaxis=dict(title="Explained Variance"))
    )

    # Line chart: 累计解释方差
    cumulative_var_ratio = np.cumsum(explained_var_ratio)
    line_fig = go.Figure(
        data=[go.Scatter(x=pcs, y=cumulative_var_ratio, mode="lines+markers")],
        layout=go.Layout(title="Cumulative Explained Variance",
                         xaxis=dict(title="Principal Component"),
                         yaxis=dict(title="Cumulative Explained Variance"))
    )

    # 因子收益（即把原始收益投影到主成分上）
    X = np.asarray(daily_returns)  # shape (n_samples, n_features)
    # pca.components_ 形状 (n_components, n_features)
    factor_returns = pd.DataFrame(
        X.dot(pca.components_.T),
        index=daily_returns.index,
        columns=[f"f{i+1}" for i in range(max_components)]
    )

    # 因子暴露（每个主成分在原始资产空间的系数）
    factor_exposures = pd.DataFrame(
        pca.components_,
        index=[f"f{i+1}" for i in range(max_components)],
        columns=daily_returns.columns
    )

    # 散点图：第一、第二因子在各资产上的暴露（若只有 1 个主成分，则返回空图）
    if max_components >= 2:
        x_vals = factor_exposures.loc["f1"].values
        y_vals = factor_exposures.loc["f2"].values
        labels = factor_exposures.columns.tolist()
        scatter_fig = go.Figure(
            data=[
                go.Scatter(
                    x=x_vals,
                    y=y_vals,
                    mode="markers+text",
                    text=labels,
                    textposition="top center",
                )
            ],
            layout=go.Layout(
                title="Scatter Plot of First Two Factor Exposures",
                xaxis=dict(title="Factor 1 Loading"),
                yaxis=dict(title="Factor 2 Loading")
            )
        )
    else:
        scatter_fig = go.Figure()
        scatter_fig.update_layout(title="Not enough components for 2D scatter")

    return bar_fig, line_fig, scatter_fig


if __name__ == "__main__":
    app.run(debug=True, port=8050)

# AAPL, TSLA, NVDA, MSFT, JPM, HIMS, BABA, NIO, TXN, INTC, PLTR
# RGTI, QBTS, QUBT, IONQ