# Testing and Evaluating Options Pricing Models

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

## Load the Libs we need

In [2]:
# import Lib
import pandas as pd
import datetime as dt
import pytz
import os
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import scipy.stats as si
import math
import networkx as nx

# import module
from datetime import datetime, timezone
from datetime import date, time
from math import trunc
from dateutil.parser import parse

## Introduction to Testing and Evaluating Trading Models

In [9]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# Mock data
past_prices = np.array([100, 105, 103, 108, 107]).reshape(-1, 1)
option_prices = np.array([5, 4.5, 5.2, 4.8, 4.7])

# Split the data into training and testing subsets
X_train, X_test, y_train, y_test = train_test_split(past_prices, option_prices, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression().fit(X_train, y_train)

# Evaluate the model's performance using Mean Absolute Error (MAE)
predicted = model.predict(X_test)
mae = mean_absolute_error(y_test, predicted)

print(f"Mean Absolute Error: {mae:.2f}")


Mean Absolute Error: 0.40


## Performance Metrics for Trading Models

In [13]:
import numpy as np

# Define the Sharpe Ratio function
def sharpe_ratio(returns, risk_free_rate=0.01):
    excess_returns = returns - risk_free_rate
    return np.mean(excess_returns) / np.std(excess_returns)

# Define the Sortino Ratio function
def sortino_ratio(returns, risk_free_rate=0.01, target_return=0):
    excess_returns = returns - risk_free_rate
    downside_std = np.std(np.clip(excess_returns - target_return, None, 0))
    return np.mean(excess_returns) / downside_std

# Define the Maximum Drawdown function
def max_drawdown(returns):
    cumulative_returns = np.cumprod(1 + returns)
    max_return = np.fmax.accumulate(cumulative_returns)
    return np.min(cumulative_returns / max_return - 1)

# Sample returns data
returns = np.array([0.08, 0.12, -0.05, 0.10, -0.03, 0.15, 0.07, -0.06, 0.04, -0.10])

# Print the metrics
print(f"Sharpe Ratio: {sharpe_ratio(returns):.2f}")
print(f"Sortino Ratio: {sortino_ratio(returns):.2f}")
print(f"Maximum Drawdown: {max_drawdown(returns):.2f}")



Sharpe Ratio: 0.27
Sortino Ratio: 0.58
Maximum Drawdown: -0.12


## Backtesting Techniques

In [14]:
new_past_prices = np.array([106, 104, 107, 109, 108]).reshape(-1, 1)

predicted_option_prices = model.predict(new_past_prices)
print(predicted_option_prices)



[4.86097561 4.94634146 4.81829268 4.73292683 4.77560976]


In [15]:
def rolling_window_backtest(data, window_size):
    signals = []
    for start in range(0, len(data) - window_size):
        end = start + window_size
        window_data = data[start:end]
        moving_avg = np.mean(window_data)
        if window_data[-1] > moving_avg:
            signals.append('buy')
        else:
            signals.append('sell')
    return signals

data = np.array([100, 102, 99, 105, 104, 103, 108, 107])
signals = rolling_window_backtest(data, window_size=3)
print(signals)


['sell', 'buy', 'buy', 'sell', 'buy']


## Model Validation Approaches

In [19]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Generate a larger mock dataset
np.random.seed(42)  # for reproducibility

# Creating a sequence of 500 numbers as past prices
past_prices = np.linspace(50, 150, 500) 

# Generating option prices with a linear relationship + some random noise
option_prices = 0.1 * past_prices + 5 + np.random.normal(0, 2, 500)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(past_prices.reshape(-1, 1), option_prices, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression().fit(X_train, y_train)

# Predict using the test set
predictions = model.predict(X_test)

# Calculate R^2 score
accuracy = r2_score(y_test, predictions)

print(f"Model R^2 Score: {accuracy:.2f}")


Model R^2 Score: 0.73


In [21]:
from sklearn.model_selection import cross_val_score

# Reshape the data to ensure it's in the expected 2D format
X = past_prices.reshape(-1, 1)
y = option_prices

# Perform cross-validation
scores = cross_val_score(LinearRegression(), X, y, cv=5)
print(f"Cross-validation Scores: {scores}")
print(f"Average Score: {np.mean(scores):.2f}")


Cross-validation Scores: [0.07126362 0.08633118 0.01805444 0.0756227  0.00986164]
Average Score: 0.05


In [22]:
# Example: Out-of-sample testing with a new dataset.

# Generate new data points that the model hasn't seen and ensure it's reshaped
new_data = np.array([110, 111, 113, 112, 115, 116, 118, 117, 119, 120]).reshape(-1, 1)

# Use the model (which was trained on past_prices) to predict option prices for the new data
predicted_prices = model.predict(new_data)
print(predicted_prices)




[16.02148275 16.12297715 16.32596595 16.22447155 16.52895475 16.63044916
 16.83343796 16.73194356 16.93493236 17.03642676]


## Addressing Overfitting and Improving Model Performance

In [23]:
# Example: We'll use Lasso regression, a form of linear regression that includes a penalty term which can help in reducing overfitting.

from sklearn.linear_model import Lasso

lasso_model = Lasso(alpha=0.1).fit(X_train, y_train)
predictions = lasso_model.predict(X_test)

accuracy = lasso_model.score(X_test, y_test)
print(f"Lasso Model Accuracy: {accuracy:.2f}")


Lasso Model Accuracy: 0.73


In [24]:
# Example: Using Ridge regression to reduce overfitting.
from sklearn.linear_model import Ridge

ridge_model = Ridge(alpha=0.5).fit(X_train, y_train)
ridge_predictions = ridge_model.predict(X_test)

accuracy = ridge_model.score(X_test, y_test)
print(f"Ridge Model Accuracy: {accuracy:.2f}")


Ridge Model Accuracy: 0.73


In [25]:
# Example: Using Random Forests, which are less prone to overfitting compared to simple linear models.

from sklearn.ensemble import RandomForestRegressor

rf_model = RandomForestRegressor(n_estimators=100).fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)

accuracy = rf_model.score(X_test, y_test)
print(f"Random Forest Model Accuracy: {accuracy:.2f}")




Random Forest Model Accuracy: 0.65
