In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime as dt
import numpy as np

**Metals**
- GC: Gold
- SI: Silver

**Agriculture**
- ZS: Soybean
- ZC: Corn

**Softs**
- SC: Sugar
- CC: Cocoa

**Energy**
- CL: Crude Oil
- NG: Natural Gas

**Interest**
- ED: Eurodollar
- SR1: Short-Term Interest Rate

**Equity**
- ES: E-mini S&P 500
- NQ: E-mini Nasdaq-100

**Bonds**
- ZT: 2 years T-Note
- ZF: 5 years T-Note
- ZN: 10 years T-Note
- ZB: Treasury Bond Future
- UB: Ultra Treasury Bond Future

In [20]:
# df = pd.read_csv(f"data/{name}.csv")

# df["Date"] = pd.to_datetime(df["Date"])
# df = df.sort_values("Date").reset_index(drop=True)
# df.set_index("Date", inplace=True)
# df = df[df.index.year >= 2000]


# def get_name():
#     return f"{name}: {df.index.min().year}-{df.index.max().year}"

In [21]:
code = "m"  # or U
# code = "U"  # or m


stp = 21 if code == "m" else 5
pow_fctr = 12 if code == "m" else 52

In [22]:
def get_annualized_return(series):
    # return series.pct_change().mean() * len(series) * 12 * 100
    # return series.pct_change().median() * 12 * 100


    start = series.iloc[0]
    end = series.iloc[-1]
    _rtr = (end - start) / start


    # return ((1 + _rtr) ** pow_fctr - 1) * 100

    return _rtr * 12 * 100

In [23]:
def _get_df(name: str):
    df = pd.read_csv(f"data/{name}.csv")

    df["Date"] = pd.to_datetime(df["Date"])
    df = df.sort_values("Date").reset_index(drop=True)
    df.set_index("Date", inplace=True)
    df = df[df.index.year >= 2000]
    return df


def get_rtr_vol(names: list[str]):
    data_dict = {name: {"df": _get_df(name)} for name in names}

    for name in data_dict:
        _df = data_dict[name]["df"]
        data_dict[name]["sample"] = _df["Close"].resample("D").last().dropna()

        _rtr = data_dict[name]["sample"].pct_change().dropna() * 100
        _r_vol = _rtr.rolling(14, min_periods=2).std().rolling(stp, step=stp).mean()
        data_dict[name]["vol"] = (_r_vol * np.sqrt(252)).dropna()[1:]

        _r_intv = data_dict[name]["sample"].rolling(stp, step=stp)
        _r_annu = _r_intv.apply(get_annualized_return).dropna()[1:]
        # _r_clip = np.clip(
        #     _r_annu,
        #     _r_annu.mean() - 3 * _r_annu.std(),
        #     _r_annu.mean() + 3 * _r_annu.std(),
        # )
        data_dict[name]["rtr"] = _r_annu

    return data_dict

In [24]:
from plotly.colors import n_colors
import plotly.graph_objects as go


contracts = get_rtr_vol(
    [
        "si",
        "gc",
        "zc",
        "zs",
        "cc",
        # "cl",
        "ng",
        "es",
        "nq",
        "zf",
        "zn",
    ]
)


# Generate colors
num_contracts = len(contracts)
color_map = n_colors(
    "rgb(0, 0, 255)",  # Start with blue
    "rgb(255, 0, 0)",  # End with red
    num_contracts,
    colortype="rgb",
)

# Map each contract name to a color
color_map = {name: color_map[i] for i, name in enumerate(contracts)}

In [25]:
fig = go.Figure()

for name in contracts:
    fig.add_trace(
        go.Scatter(
            x=contracts[name]["rtr"].index,
            y=contracts[name]["rtr"],
            name=name,
            mode="lines",
            line=dict(color=color_map[name]),
        )
    )
fig.update_layout(
    yaxis=dict(title="Return"),
    xaxis=dict(title="Time"),
    title="Return (Monthly Annualized) Over Time",
    legend=dict(x=0.5, y=1.2, orientation="h"),
)

fig.show()

In [26]:
fig = go.Figure()

for name in contracts:
    fig.add_trace(
        go.Scatter(
            x=contracts[name]["vol"].index,
            y=contracts[name]["vol"],
            name=name,
            mode="lines",
            line=dict(color=color_map[name]),
        )
    )
fig.update_layout(
    yaxis=dict(
        title="Volatility",
    ),
    xaxis=dict(title="Time"),
    title="Volatility Over Time",
    legend=dict(x=0.5, y=1.2, orientation="h"),
)

fig.show()

In [27]:
fig = go.Figure()

for name in contracts:
    _vol_vol = contracts[name]["vol"].rolling(14).std().dropna()
    fig.add_trace(
        go.Scatter(
            x=_vol_vol.index,
            y=_vol_vol,
            name=name,
            mode="lines",
            line=dict(color=color_map[name]),
        )
    )
fig.update_layout(
    yaxis=dict(title="Return"),
    xaxis=dict(title="Time"),
    title="Volatility of Vol (14 days rolling) Over Time",
    legend=dict(x=0.5, y=1.2, orientation="h"),
)

fig.show()

## Observation
* vol of vol tell us the clustering effect
* spike in vol of vol only tell us about change in vol, not the direction of change
* plateaus tell us that vol is changing (not the direction)
* trough tell us that vol is staying there (clustering effect)
* using above point, we can also say if vol is staying there, then high (or low) vol time is also followed by high (or low) vol time. If this was not the case, that is if high vol is not followed by high vol time then vol would keep on changing rapidly, and hence vol of vol will stay high (which is obviously not the case always) 



As vol of vol only tell us the direction of change, intuition says there should be no correlation between vol of vol and return

In [28]:
fig = go.Figure()

for name in contracts:
    fig.add_trace(
        go.Scatter(
            x=contracts[name]["vol"].index,
            y=contracts[name]["rtr"].values / contracts[name]["vol"].values,
            name=name,
            mode="lines",
            line=dict(color=color_map[name]),
        )
    )
fig.update_layout(
    yaxis=dict(
        title="Sharpe Ratio",
    ),
    xaxis=dict(title="Time"),
    title="Sharpe Ratio Over Time",
    legend=dict(x=0.5, y=1.2, orientation="h"),
)

fig.show()

In [29]:
def get_vol_qtiles(cd: dict):

    qtile = cd["vol"].quantile([0.2, 0.4, 0.6, 0.8])

    vol_q1 = cd["vol"][cd["vol"] <= qtile.values[0]]
    vol_q2 = cd["vol"][(qtile.values[0] < cd["vol"]) & (cd["vol"] <= qtile.values[1])]
    vol_q3 = cd["vol"][(qtile.values[1] < cd["vol"]) & (cd["vol"] <= qtile.values[2])]
    vol_q4 = cd["vol"][(qtile.values[2] < cd["vol"]) & (cd["vol"] <= qtile.values[3])]
    vol_q5 = cd["vol"][qtile.values[3] < cd["vol"]]

    nq1_lookup = [
        f"{i.year}_{int(dt.strftime(i, f'%{code}'))+1:>02}" for i in vol_q1.index
    ]
    nq2_lookup = [
        f"{i.year}_{int(dt.strftime(i, f'%{code}'))+1:>02}" for i in vol_q2.index
    ]
    nq3_lookup = [
        f"{i.year}_{int(dt.strftime(i, f'%{code}'))+1:>02}" for i in vol_q3.index
    ]
    nq4_lookup = [
        f"{i.year}_{int(dt.strftime(i, f'%{code}'))+1:>02}" for i in vol_q4.index
    ]
    nq5_lookup = [
        f"{i.year}_{int(dt.strftime(i, f'%{code}'))+1:>02}" for i in vol_q5.index
    ]

    nq1_lookup = [
        (
            f"{int(i[:4])+1}_01"
            if (i.endswith("13") and code == "m") or i.endswith("53")
            else i
        )
        for i in nq1_lookup
    ]
    nq2_lookup = [
        (
            f"{int(i[:4])+1}_01"
            if (i.endswith("13") and code == "m") or i.endswith("53")
            else i
        )
        for i in nq2_lookup
    ]
    nq3_lookup = [
        (
            f"{int(i[:4])+1}_01"
            if (i.endswith("13") and code == "m") or i.endswith("53")
            else i
        )
        for i in nq3_lookup
    ]
    nq4_lookup = [
        (
            f"{int(i[:4])+1}_01"
            if (i.endswith("13") and code == "m") or i.endswith("53")
            else i
        )
        for i in nq4_lookup
    ]
    nq5_lookup = [
        (
            f"{int(i[:4])+1}_01"
            if (i.endswith("13") and code == "m") or i.endswith("53")
            else i
        )
        for i in nq5_lookup
    ]

    next_vol = []
    next_rtr = []
    nlkps = [nq1_lookup, nq2_lookup, nq3_lookup, nq4_lookup, nq5_lookup]

    for lkp in nlkps:
        next_vol.append(cd["vol"][cd["vol"].index.strftime(f"%Y_%{code}").isin(lkp)])
        next_rtr.append(cd["rtr"][cd["rtr"].index.strftime(f"%Y_%{code}").isin(lkp)])

    return next_vol, next_rtr

In [30]:
qtiles = {i: get_vol_qtiles(contracts[i]) for i in contracts}

In [31]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(
    rows=1,
    cols=2,
    subplot_titles=(
        "Annualized Next Month Volatility",
        "Annualized Next Month Return",
    ),
)

# Labels for x-axis
labels = ["Q1", "Q2", "Q3", "Q4", "Q5"]


# Plot data for each contract
for idx, name in enumerate(contracts):
    # Get volatility and return quintiles
    vol_values = [i.mean() for i in qtiles[name][0]]
    rtr_values = [i.mean() for i in qtiles[name][1]]

    # Add volatility bars
    fig.add_trace(
        go.Bar(
            name=name.upper(),
            x=labels,
            y=vol_values,
            marker_color=color_map[name],
            showlegend=True,
            legendgroup=name,
        ),
        row=1,
        col=1,
    )

    # Add return bars
    fig.add_trace(
        go.Bar(
            name=name.upper(),
            x=labels,
            y=rtr_values,
            marker_color=color_map[name],
            showlegend=False,
            legendgroup=name,
        ),
        row=1,
        col=2,
    )

# Update layout
fig.update_layout(
    height=600,
    width=1400,
    title_text="Volatility and Returns by Quintile",
    barmode="group",
    bargap=0.15,
    bargroupgap=0.1,
    legend=dict(x=0.5, y=1.2, orientation="h"),

)

# Update y-axes labels
fig.update_yaxes(title_text="Annualized Vol", row=1, col=1)
fig.update_yaxes(title_text="Annualized Return", row=1, col=2)

# Update x-axes labels
fig.update_xaxes(title_text="Vol Quintile", row=1, col=1)
fig.update_xaxes(title_text="Vol Quintile", row=1, col=2)

# Show the plot
fig.show()

## TODO

- {R,R-1}x{V,V-1} scatter & KDE plot
- Vol of Vol Distribution

In [32]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import numpy as np

# Create 2x2 subplots
fig = make_subplots(
    rows=2,
    cols=2,
    subplot_titles=(
        "R/V-i Correlation",
        "R-i/V Correlation",
        "R/R-i Correlation",
        "V/V-i Correlation",
    ),
)

num_contracts = len(contracts)

# Map each contract name to a color
# colors = {name: colors[i] for i, name in enumerate(contracts)}

# Shared traces for all plots
for name in contracts:
    # R/V-i correlation
    corr_rv = [
        np.corrcoef(
            contracts[name]["vol"][:-i].values,
            contracts[name]["rtr"][i:].values,
        )[0][1]
        for i in range(1, 13)
    ]
    fig.add_trace(
        go.Scatter(
            x=list(range(1, 13)),
            y=corr_rv,
            mode="lines+markers",
            name=name.upper(),
            line=dict(color=color_map[name]),
            legendgroup=name.upper(),  # Link legend to the same group
        ),
        row=1,
        col=1,
    )

    # R-i/V correlation
    corr_riv = [
        np.corrcoef(
            contracts[name]["rtr"][:-i].values,
            contracts[name]["vol"][i:].values,
        )[0][1]
        for i in range(1, 13)
    ]
    fig.add_trace(
        go.Scatter(
            x=list(range(1, 13)),
            y=corr_riv,
            mode="lines+markers",
            name=name.upper(),
            line=dict(color=color_map[name]),
            legendgroup=name.upper(),  # Link legend to the same group
            showlegend=False,  # Avoid duplicate legend entries
        ),
        row=1,
        col=2,
    )

    # R/R-i autocorrelation
    corr_rr = [
        np.corrcoef(
            contracts[name]["rtr"][:-i].values,
            contracts[name]["rtr"][i:].values,
        )[0][1]
        for i in range(1, 13)
    ]
    fig.add_trace(
        go.Scatter(
            x=list(range(1, 13)),
            y=corr_rr,
            mode="lines+markers",
            name=name.upper(),
            line=dict(color=color_map[name]),
            legendgroup=name.upper(),  # Link legend to the same group
            showlegend=False,
        ),
        row=2,
        col=1,
    )

    # V/V-i autocorrelation
    corr_vv = [
        np.corrcoef(
            contracts[name]["vol"][:-i].values,
            contracts[name]["vol"][i:].values,
        )[0][1]
        for i in range(1, 13)
    ]
    fig.add_trace(
        go.Scatter(
            x=list(range(1, 13)),
            y=corr_vv,
            mode="lines+markers",
            name=name.upper(),
            line=dict(color=color_map[name]),
            legendgroup=name.upper(),  # Link legend to the same group
            showlegend=False,
        ),
        row=2,
        col=2,
    )

# Update layout for each subplot
for i in range(1, 3):
    for j in range(1, 3):
        fig.update_xaxes(title_text="Lag", row=i, col=j)
        fig.update_yaxes(
            title_text="Correlation Coefficient",
            range=[-1, 1],
            row=i,
            col=j,
        )

# Update overall layout
fig.update_layout(
    height=800,
    width=1400,
    showlegend=True,
    title_text="Correlation Analysis",
    template="plotly_white",
    legend=dict(x=0.5, y=1.2, orientation="h"),

)

# Show the interactive plot
fig.show()


## Correlation between 
## a. $Return_t$ & $Var_t - Var_{t-1}$
## b. $Return_t$ & $Var_t$

In [33]:
import numpy as np
import plotly.graph_objects as go

# Calculate correlations
correlations = {
    name: [
        np.corrcoef(
            contracts[name]["rtr"].values[1:],  # Skip the first value for returns
            contracts[name]["vol"].values[1:] ** 2
            - contracts[name]["vol"].values[:-1] ** 2,  # Change in squared vol
        ),
        np.corrcoef(
            contracts[name]["rtr"].values,
            contracts[name]["vol"].values ** 2,
        ),
    ]
    for name in contracts
}

# Create bar chart
fig = make_subplots(
    rows=1,
    cols=2,
    subplot_titles=(
        "Correlation: Return(t) & [Var(t) - Var(t-1)]",
        "Correlation: Return(t) & Var(t)",
    ),
)

# Add identical plots to both subplots
for name, corr in correlations.items():
    # Plot for the left side (first plot)
    name = name.upper()
    fig.add_trace(
        go.Bar(
            x=[name],
            y=[corr[0][0][1]],
            name=name,
            marker_color=color_map[name.lower()],  # Use the mapped color
            showlegend=True,
            legendgroup=name, 
        ),
        row=1,
        col=1,
    )
    # Plot for the right side (second plot)
    fig.add_trace(
        go.Bar(
            x=[name],
            y=[corr[1][0][1]],
            name=name,
            marker_color=color_map[name.lower()],  # Use the mapped color
            showlegend=False,
            legendgroup=name
        ),
        row=1,
        col=2,
    )

# Update layout
fig.update_layout(
    # title="Side-by-Side Correlation Plots",
    xaxis_title="Contract",
    yaxis_title="Correlation",
    legend_title="Contracts",
    # showlegend=False,  # Hide legend to avoid repetition
    width=1400,  # Set the width of the entire figure
    height=600,
    legend=dict(x=0.5, y=1.2, orientation="h"),

)

fig.show()

In [34]:
fig = go.Figure()

for name in contracts:
    fig.add_trace(
        go.Bar(
            name=name.upper(),
            x=[name],
            y=[
                np.corrcoef(
                    contracts[name]["vol"].rolling(14, min_periods=2).std().dropna(),
                    contracts[name]["rtr"][1:],
                )[0][1]
            ],
            marker_color=color_map[name],
            showlegend=True,
            # legendgroup=name,
        ),
    )

fig.update_layout(
    height=600,
    width=1400,
    title_text="Correlation: [Vol of Vol](t) and Returns(t)",
    barmode="group",
    bargap=0.15,
    bargroupgap=0.1,
    legend=dict(x=0.5, y=1.2, orientation="h"),
)

In [35]:
import pandas as pd
import plotly.graph_objects as go

for name in contracts:
    vol_df = contracts[name]["vol"]

    vol_df = vol_df.groupby([vol_df.index.year, vol_df.index.month]).mean()
    vol_df.index.names = ["Year", "Month"]

    vol_df = vol_df.reset_index()

    fig = go.Figure()

    for year in vol_df["Year"].unique()[-10::]:
        year_data = vol_df[vol_df["Year"] == year]

        fig.add_trace(
            go.Scatter(
                x=year_data["Month"],  # Use month as numeric
                y=year_data.iloc[:, 2],  # Assuming the third column contains vol data
                mode="lines",
                name=str(year),
            )
        )

    # Update x-axis to display month names in chronological order
    fig.update_xaxes(
        title="Month",
        tickmode="array",
        tickvals=list(range(1, 13)),
        ticktext=[
            "January",
            "February",
            "March",
            "April",
            "May",
            "June",
            "July",
            "August",
            "September",
            "October",
            "November",
            "December",
        ],
    )

    # Update layout for better visibility
    fig.update_layout(
        title=f"Volatility by Month for {name.upper()} (past 10 years)",
        xaxis_title="Month",
        yaxis_title="Volatility",
        legend=dict(title="Year", itemclick="toggle", itemdoubleclick="toggle"),
        template="plotly_white",
    )

    # Show the plot
    fig.show()


---
## Analysis Ends
---

In [36]:
1 / 0

ZeroDivisionError: division by zero

In [None]:
import yfinance as yf
fname = "cc"
dat = yf.Ticker(f"{fname}=F".capitalize())
_yf = dat.history(period="max")
newdf = _yf['Close']
newdf

In [None]:
newdf.index =  newdf.index.date
newdf

In [1804]:
newdf = newdf.reset_index()
newdf.columns = ['Date', 'Close']
newdf.to_csv(f"data/{fname}.csv", index=False)