Description: This notebook tracks various notes, stragies and implementations that may be used (jointly with others) to create an aggreagated, weighted trading strategy for our team's CTC 2024 Case.

In [1]:
!git clone https://github.com/Cornell-Quant-Fund/CTC-2024-Derivatives.git

# Note to Recruiter:
# Due to updates in the CTC-2024-Derivatives data repository, some components of
# the code may not run as originally intended. However, the trading strategies
# are labeled and were functional and tested at the time of project completion.

Cloning into 'CTC-2024-Derivatives'...
remote: Enumerating objects: 137, done.[K
remote: Counting objects: 100% (69/69), done.[K
remote: Compressing objects: 100% (20/20), done.[K
remote: Total 137 (delta 60), reused 49 (delta 49), pack-reused 68 (from 1)[K
Receiving objects: 100% (137/137), 72.04 MiB | 13.62 MiB/s, done.
Resolving deltas: 100% (66/66), done.


In [None]:
#initial play-testing strat

import random
import pandas as pd
from datetime import datetime

class Strategy:

  def __init__(self) -> None:
    self.capital : float = 100_000_000
    self.portfolio_value : float = 0

    self.start_date : datetime = datetime(2024, 1, 1)
    self.end_date : datetime = datetime(2024, 3, 30)

    self.options : pd.DataFrame = pd.read_csv("data/cleaned_options_data.csv")
    self.options["day"] = self.options["ts_recv"].apply(lambda x: x.split("T")[0])

    self.underlying = pd.read_csv("data/underlying_data_hour.csv")
    self.underlying.columns = self.underlying.columns.str.lower()

  def generate_orders(self) -> pd.DataFrame:
    orders = []
    num_orders = 1000

    for _ in range(num_orders):
      row = self.options.sample(n=1).iloc[0]
      action = random.choice(["B", "S"])

      if action == "B":
        order_size = random.randint(1, int(row["ask_sz_00"]))
      else:
        order_size = random.randint(1, int(row["bid_sz_00"]))

      assert order_size <= int(row["ask_sz_00"]) or int(row["bid_sz_00"])

      order = {
        "datetime" : row["ts_recv"],
        "option_symbol" : row["symbol"],
        "action" : action,
        "order_size" : order_size
      }
      orders.append(order)

    return pd.DataFrame(orders)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import scipy
import statsmodels
import pandas as pd
from datetime import datetime

class Strategy:

  def __init__(self) -> None:
    self.capital : float = 100_000_000
    self.portfolio_value : float = 0

    self.start_date : datetime = datetime(2024, 1, 1)
    self.end_date : datetime = datetime(2024, 3, 30)

    self.options : pd.DataFrame = pd.read_csv("data/cleaned_options_data.csv")
    self.options["day"] = self.options["ts_recv"].apply(lambda x: x.split("T")[0])

    self.underlying = pd.read_csv("data/underlying_data_hour.csv")
    self.underlying.columns = self.underlying.columns.str.lower()

  def generate_orders(self) -> pd.DataFrame:
    pass

In [None]:
# block to solve for current volatility based on inputs
# Fast Implied Volatility
import numpy as np
from scipy.stats import norm
N = norm.cdf

def bs_call(S, K, T, r, vol):
    d1 = (np.log(S/K) + (r + 0.5*vol**2)*T) / (vol*np.sqrt(T))
    d2 = d1 - vol * np.sqrt(T)
    return S * norm.cdf(d1) - np.exp(-r * T) * K * norm.cdf(d2)

def bs_vega(S, K, T, r, sigma):
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    return S * norm.pdf(d1) * np.sqrt(T)

def find_vol(target_value, S, K, T, r, *args):
    MAX_ITERATIONS = 200
    PRECISION = 1.0e-5
    sigma = 0.5
    for i in range(0, MAX_ITERATIONS):
        price = bs_call(S, K, T, r, sigma)
        vega = bs_vega(S, K, T, r, sigma)
        diff = target_value - price  # our root
        if (abs(diff) < PRECISION):
            return sigma
        sigma = sigma + diff/vega # f(x) / f'(x)
    return sigma # value wasn't found, return best guess so far

In [None]:
# Covered calls Protective puts (Hedging Strategy)
import pandas as pd
import random
from datetime import datetime, timedelta

class Strategy:

  def __init__(self) -> None:
    self.capital : float = 100_000_000
    self.portfolio_value : float = 0
    self.start_date : datetime = datetime(2024, 1, 1)
    self.end_date : datetime = datetime(2024, 3, 30)

    self.options : pd.DataFrame = pd.read_csv("data/cleaned_options_data.csv")
    self.options["day"] = self.options["ts_recv"].apply(lambda x: x.split("T")[0])

    self.underlying = pd.read_csv("data/underlying_data_hour.csv")
    self.underlying.columns = self.underlying.columns.str.lower()

  def generate_orders(self) -> pd.DataFrame:
    order_data = []

    for _ in range(5):
      call_order = self.covered_call()
      order_data.append(call_order)

      put_order = self.protective_put()
      order_data.append(put_order)

    order_df = pd.DataFrame(order_data, columns=["datetime", "option_symbol", "action", "order_size"])
    return order_df

  def covered_call(self):
    call_option = self.options[self.options["type"] == "call"].sample().iloc[0]
    order_datetime = self.random_order_time(call_option['day'])
    option_symbol = call_option['option_symbol']

    buy_order = [order_datetime, option_symbol, "B", 1]
    sell_order = [order_datetime, option_symbol, "S", random.randint(1, 10)]

    return [buy_order, sell_order]

  def protective_put(self):
    put_option = self.options[self.options["type"] == "put"].sample().iloc[0]
    order_datetime = self.random_order_time(put_option['day'])
    option_symbol = put_option['option_symbol']

    buy_order_underlying = [order_datetime, option_symbol, "B", 1]
    buy_order_put = [order_datetime, option_symbol, "B", random.randint(1, 10)]

    return [buy_order_underlying, buy_order_put]

  def random_order_time(self, day_str):
    day = datetime.strptime(day_str, '%Y-%m-%d')
    random_time = day + timedelta(
      hours=random.randint(0, 23),
      minutes=random.randint(0, 59),
      seconds=random.randint(0, 59),
      microseconds=random.randint(0, 999999)
    )
    return random_time.isoformat() + "Z"

# arbitrage
import pandas as pd
from datetime import datetime

class Strategy:

  def __init__(self) -> None:
    self.capital : float = 100_000_000
    self.portfolio_value : float = 0

    self.start_date : datetime = datetime(2024, 1, 1)
    self.end_date : datetime = datetime(2024, 3, 30)

    self.options : pd.DataFrame = pd.read_csv("data/cleaned_options_data.csv")
    self.options["bid_px_00"] = self.options["bid_px_00"].astype(float)
    self.options["ask_px_00"] = self.options["ask_px_00"].astype(float)
    self.options["day"] = self.options["ts_recv"].apply(lambda x: x.split("T")[0])

    self.underlying = pd.read_csv("data/underlying_data_hour.csv")
    self.underlying.columns = self.underlying.columns.str.lower()

  def generate_orders(self) -> pd.DataFrame:
    """
    ask price - lowest price someone is willing to sell for
    bid price - max price a buyer is willing to buy for

    whenever ask < bid, arbitrage opportunity to buy
    """
    arbitrage_options: pd.DataFrame = self.options[self.options["bid_px_00"] > self.options["ask_px_00"]]
    orders_df: pd.DataFrame = arbitrage_options
    orders_df = orders_df.drop(columns=["instrument_id","bid_px_00","ask_px_00","bid_sz_00"])
    orders_df.rename(columns={"ask_sz_00": "order_size","symbol":"option_symbol"})
    return orders_df

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import scipy
import statsmodels
import pandas as pd
from scipy.stats import norm
from datetime import datetime

class Strategy:

    def __init__(self, risk_free_rate=0.03, volatility=0.2) -> None:
        self.capital: float = 1_000_000
        self.portfolio: float = 0
        self.current_delta: float = 0  # Track current portfolio delta for hedging
        self.risk_free_rate = risk_free_rate
        self.volatility = volatility  # Assumed constant volatility for the underlying - NME Education as specified by Marie

        # Assuming options data is preloaded in the backtester
        self.options: pd.DataFrame = pd.read_csv("data/cleaned_options_data.csv")
        self.options["day"] = pd.to_datetime(self.options["ts_recv"]).dt.date

    def black_scholes_delta(self, S, K, T, r, sigma, option_type='C'):
        """
        Calculate Black-Scholes delta for call or put option.
        """
        try:
            d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
            if option_type == 'C':
                return norm.cdf(d1)  # Call delta
            elif option_type == 'P':
                return norm.cdf(d1) - 1  # Put delta
        except Exception as e:
            print(f"Error calculating Black-Scholes delta: {e}")
            return 0

    def calculate_delta(self, option_row, underlying_price, current_date):
        """
        Calculate delta for each option using the Black-Scholes formula.
        """
        try:
            strike_price = float(option_row["strike"])
            expiration_date = datetime.strptime(option_row["expiration_date"], "%Y-%m-%d").date()
            T = (expiration_date - current_date).days / 365.0  # Time to expiration in years

            # Use Black-Scholes (lord and savior) to calculate delta
            if "C" in option_row["symbol"]:
                return self.black_scholes_delta(S=underlying_price, K=strike_price, T=T, r=self.risk_free_rate, sigma=self.volatility, option_type='C')
            elif "P" in option_row["symbol"]:
                return self.black_scholes_delta(S=underlying_price, K=strike_price, T=T, r=self.risk_free_rate, sigma=self.volatility, option_type='P')
        except Exception as e:
            print(f"Error in calculate_delta: {e}")
            return 0

    def generate_orders(self, underlying_prices) -> pd.DataFrame:
        """
        Generate orders based on delta-neutral strategy without setting an arbitrary limit on the number of orders.
        Orders will be placed when the delta imbalance requires it.
        """
        orders = []

        for current_date, price in underlying_prices.items():
            available_options = self.options[self.options["day"] == current_date]

            if available_options.empty:
                continue

            # Find call and put options
            call_options = available_options[available_options["symbol"].str.contains("C")]
            put_options = available_options[available_options["symbol"].str.contains("P")]

            if call_options.empty or put_options.empty:
                continue

            # Select at-the-money (ATM) options
            atm_call = call_options.iloc[0]
            atm_put = put_options.iloc[0]

            # Calculate delta for the ATM options
            call_delta = self.calculate_delta(atm_call, price, current_date)
            put_delta = self.calculate_delta(atm_put, price, current_date)

            # Delta-neutral strategy logic: Hedge by buying/selling to offset delta imbalance
            if self.current_delta < 0:
                # Hedge by buying call options
                order_size = min(int(atm_call["ask_sz_00"]), 5)
                orders.append({
                    "datetime": atm_call["ts_recv"],
                    "option_symbol": atm_call["symbol"],
                    "action": "B",
                    "order_size": order_size
                })
                self.current_delta += call_delta * order_size

            if self.current_delta > 0:
                # Hedge by buying put options
                order_size = min(int(atm_put["ask_sz_00"]), 5)
                orders.append({
                    "datetime": atm_put["ts_recv"],
                    "option_symbol": atm_put["symbol"],
                    "action": "B",
                    "order_size": order_size
                })
                self.current_delta += put_delta * order_size

        return pd.DataFrame(orders)

# Section 2: implementation


In [None]:
# Function for calculating volatility:
# using log returns, using rolling window to calculate, annualizing vol (adjusted for 3 months), handling outliers, and using a dynamic window size
import numpy as np
import pandas as pd

def calculate_volatility(underlying_data: pd.DataFrame, window: int = 24) -> pd.Series:
    """
    Calculate rolling volatility based on hourly underlying price data.

    Parameters:
    - underlying_data: DataFrame containing hourly 'close' prices and 'volume' columns.
    - window: Rolling window size in hours (default is 24 for daily volatility).

    Returns:
    - A pandas Series with rolling volatility values adjusted for 3 months of data.
    """
    #Refining the data
    underlying_data['date'] = pd.to_datetime(underlying_data['date'])
    underlying_data = underlying_data[underlying_data['volume'] > 1000]

    # Calculate log returns for the 'close'
    underlying_data['log_return'] = np.log(underlying_data['close'] / underlying_data['close'].shift(1))
    underlying_data.dropna(subset=['log_return'], inplace=True)
    mean_return = underlying_data['log_return'].mean()
    std_return = underlying_data['log_return'].std()
    underlying_data = underlying_data[(underlying_data['log_return'] > (mean_return - 3 * std_return)) &
                                      (underlying_data['log_return'] < (mean_return + 3 * std_return))]

    # Calculate rolling standard deviation (volatility)
    rolling_volatility = underlying_data['log_return'].rolling(window=window).std()
    annualized_volatility = rolling_volatility * np.sqrt(409.5)  # 3 months = 409.5 trading hours

    return annualized_volatility


In [None]:
# Signal generating code block- input is a dataframe with our transformed data, output is signal to be multiplied by desired capital input
import pandas as pd
import numpy as np

def market_neutral_weights(df: pd.DataFrame) -> pd.DataFrame:
    """
    Implements a market-neutral strategy by ranking options, selecting the top 3 for long positions and the bottom 3 for short positions.
    Outputs the weights for each option, normalized so that the absolute sum of all weights (longs and shorts) equals 1.

    Parameters:
    - df: DataFrame with DateTimeIndex and options as columns, where values represent the signals.

    Returns:
    - weights_df: DataFrame of the same shape as input with weights for each option, where the absolute value of the sum of weights is 1.
    """
    weights = []

    for date, row in df.iterrows():
        ranks = row.rank(method='average')
        top_3 = ranks.nlargest(3).index
        bottom_3 = ranks.nsmallest(3).index
        weight_row = pd.Series(0, index=row.index)
        top_3_weights = row[top_3] / row[top_3].sum()
        bottom_3_weights = row[bottom_3] / row[bottom_3].sum()
        weight_row[top_3] = top_3_weights #Assign weights to long and short positions
        weight_row[bottom_3] = -bottom_3_weights  #Shorts are negative
        weight_row = weight_row / weight_row.abs().sum()
        weights.append(weight_row)

    weights_df = pd.DataFrame(weights, index=df.index)
    return weights_df

# Example usage:
# Assuming df is a DataFrame with DateTimeIndex and columns as possible options
# weights_df = market_neutral_weights(df)
# in order to hedge, use -1*signal and multiply by 1/10 of capital. Can adjust later for improved accuracy or stats.

**below is the official product function that outputs our orders in csv format**

In [None]:
# divides options data into puts and calls to treat separately

def separate_options_data(options):
    options['stripped_symbol'] = options['symbol'].str.replace(r'^SPX', '', regex=True)

    # Filter rows based on whether the stripped symbol contains 'P' or 'C'
    put_options = options[options['stripped_symbol'].str.contains('P')]
    call_options = options[options['stripped_symbol'].str.contains('C')]
    put_options = put_options.drop(columns=['stripped_symbol'])
    call_options = call_options.drop(columns=['stripped_symbol'])

    return put_options, call_options

In [None]:
def calculate_volatility(self, price_data: pd.DataFrame, window : int = 24, column: str = 'close') -> pd.Series:
    """
    Calculate rolling volatility based on any input price data (can be underlying, options, etc.).
    By default, it calculates based on the 'close' column.
    """
    price_data['date'] = pd.to_datetime(price_data['date'])
    if 'volume' in price_data.columns:
        price_data = price_data[price_data['volume'] > 1000]
    price_data['log_return'] = np.log(price_data[column] / price_data[column].shift(1))
    price_data.dropna(subset=['log_return'], inplace=True)

    # Remove outliers beyond 3 standard deviations to avoid skewing volatility
    mean_return = price_data['log_return'].mean()
    std_return = price_data['log_return'].std()
    price_data = price_data[(price_data['log_return'] > (mean_return - 3 * std_return)) &
                            (price_data['log_return'] < (mean_return + 3 * std_return))]

    # Calculate rolling standard deviation (volatility)
    rolling_volatility = price_data['log_return'].rolling(window=window).std()
    annualized_volatility = rolling_volatility * np.sqrt(409.5)

    return annualized_volatility

def calculate_delta(self, option_row, underlying_price, current_date):
    """
    Calculate delta for each option using the Black-Scholes formula.
    """
    try:
        strike_price = float(option_row["strike"])
        expiration_date = datetime.strptime(option_row["expiration_date"], "%Y-%m-%d").date()
        T = (expiration_date - current_date).days / 365.0  # Time to expiration in years

        # Use Black-Scholes to calculate delta
        if "C" in option_row["symbol"]:
            return self.black_scholes_delta(S=underlying_price, K=strike_price, T=T, r=self.risk_free_rate, sigma=self.volatility, option_type='C')
        elif "P" in option_row["symbol"]:
            return self.black_scholes_delta(S=underlying_price, K=strike_price, T=T, r=self.risk_free_rate, sigma=self.volatility, option_type='P')
    except Exception as e:
        print(f"Error in calculate_delta: {e}")
        return 0

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import scipy
import statsmodels
import pandas as pd
from datetime import datetime

class Strategy:

  def __init__(self) -> None:
    self.capital : float = 100_000_000
    self.portfolio_value : float = 0

    self.start_date : datetime = datetime(2024, 1, 1)
    self.end_date : datetime = datetime(2024, 3, 30)

    self.options : pd.DataFrame = pd.read_csv("/content/CTC-2024-Derivatives/data/cleaned_options_data.zip")
    self.options["day"] = self.options["ts_recv"].apply(lambda x: x.split("T")[0])

    self.underlying = pd.read_csv("/content/CTC-2024-Derivatives/data/underlying_data_hour.csv")
    self.underlying.columns = self.underlying.columns.str.lower()

  def generate_orders(self) -> pd.DataFrame:
    orders = []
    num_orders = 1000

    for _ in range(num_orders):
      row = self.options.sample(n=1).iloc[0]
      action = random.choice(["B", "S"])

      if action == "B":
        order_size = random.randint(1, int(row["ask_sz_00"]))
      else:
        order_size = random.randint(1, int(row["bid_sz_00"]))

      assert order_size <= int(row["ask_sz_00"]) or int(row["bid_sz_00"])

      order = {
        "datetime" : row["ts_recv"],
        "option_symbol" : row["symbol"],
        "action" : action,
        "order_size" : order_size
      }
      orders.append(order)

    return pd.DataFrame(orders)