<a href="https://colab.research.google.com/github/The-H4CKER/deep-option-pricer/blob/main/notebooks/model_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Model Training

## Setup

In [14]:
import torch
import torch.nn as nn
import pandas as pd
from sqlalchemy import create_engine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [26]:
DB_PATH = "sqlite:///data/options_data.db"
engine = create_engine(DB_PATH)

try:
  df = pd.read_sql_table("options", engine)
  print("Database loaded successfully!")
  print(f"Loaded {len(df)} records.")
except Exception as e:
  print(f"Error loading database: {e}")

Database loaded successfully!
Loaded 2097 records.


## Data Preparation

In [27]:
df['r'] = 0.05  # define risk-free rate

# One-hot encode the 'option_type' column ('call' -> 1, 'put' -> 0)
# Replaces 'option_type' col with 'option_type_put' which is either 0.0 or 1.0
df = pd.get_dummies(df, columns=['option_type'], drop_first=True, dtype=float)

features = ['stock_price', 'strike_price', 'dte', 'r', 'implied_volatility', 'option_type_put']
target = 'market_price'

# Explicitly define model's inputs (X) and output (y)
X = df[features].values
y = df[target].values.reshape(-1, 1)  # vector forma

# print(f"X shape: {X.shape}")
# print(f"y shape: {y.shape}")

# Perform standard 80-20 split on data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# print(f"X_train shape: {X_train.shape}")
# print(f"X_test shape: {X_test.shape}")

# Scale input features to similar scale (mean=0, std=1)
scaler = StandardScaler()

# scaler should only fit to training data to avoid data leak
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert NumPy arrays into PyTorch tensors.
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)


X shape: (2097, 6)
y shape: (2097, 1)
X_train shape: (1677, 6)
X_test shape: (420, 6)
