# Python for Finance: An Introduction

In [None]:
# Let's start with an easter egg in Python
import this

# 1. Data Types, -Structures, and Operators

## Data Types

In [None]:
x = "String"

x = 'Also a String'

# x = "Not working'  # Raises SyntaxError


x = 2  # No need to indicate a line break with ";" known from other languages-> x = 2; is the same 
x = int(2)

x = 1.5
x = float(1.5)

# Conversion with str(), int(), float()


x = 10
b = "15"

# x += b  # raises TypeError

z = True is False  # Boolean values 

"""Operators"""
x += int(b)  # is the same as x = x + int(b)
print(x)

b += str(x)  # is the same as b = b + str(x)
print(b)

# print(b)   # Raises IndentationError

## Data Structures

In [None]:
# Lists
list() == []  # Evaluates as True

courses = ["Derivatives 1", "Derivatives 3", "Modern Methods in Empirical Asset Pricing"]

# indexing
print("1st: " + courses[0])  # Index starts at 0!!!
print("Last: " + courses[-1])  # last element
print("All: " + str(courses[:]))
print("1st to 2nd: " + str(courses[0:2]))  # RHS of ':' not included!

In [None]:
# Note that a string can be used like a list of characters!

model_name = "CAPM Model"
print(model_name[0:4])  # 0-indexed!

In [None]:
# modify a given list
courses[1] = "Derivatives 2"

print(courses)

In [None]:
# Tuples
# similar to lists, but different ...

tpl = ("I am", 4)  # tuple: A collection which is ordered and immutable. <> list
print(tpl[0])

# try reassigning an element
# tpl[0] = "You are"  # raises TypeError

In [None]:
# Dictionaries
x = 100


def my_function():
    print("I am a function")


dct = {  # a dictionary saves key value pairs
    "Key1": "Value1",
    2:      str,  # functional programming: We can store anything!
    5:      x,
    "Key2": {},  # another dictionary
    "Key4": my_function,  # a function
}

print(dct.keys())  # this is a method of the dictionary class also called a 'class method'
print(dct.values())  # this is another method

In [None]:
# I am a comment


"""
I'm
a block (multi-line) comment
"""

In [None]:
print("I print to stdout (the console).")

# 2. Functions and Classes

In [None]:
# functions


def inner():  # function that does not take an argument
    print("I am function inner()!")


myfunc = inner  # assigns a function to a variable


def outer(function):  # function that takes a function as an argument
    function()  # () indicate the call to the function


outer(myfunc)  # we pass the variable that stores a reference to the inner 
# function as an argument to the outer function, e.g. we call outer with the argument myfunc

In [None]:
# classes


class PutOption:
    def __init__(self, strike: float, premium: float) -> None:  # constructor method
        self.strike = strike  # class instance attribute
        self.premium = premium  # class instance attribute

    def payoff(self, stock_price: float) -> float:  # class method
        """Calculate the payoff of the put option at maturity given the stock price."""
        return max(self.strike - stock_price, 0)


put = PutOption(strike=40, premium=1)  # create an instance of the class PutOption
print(f"Put option strike: {put.strike}, premium: {put.premium}")
print(f"Put option payoff at stock price 42: {put.payoff(42)}")
print(f"Put option payoff at stock price 38: {put.payoff(38)}")

# 3. Conditional Statements

In [None]:
premium = 1
stock_price = 42
strike = 40
payoff = max(stock_price - strike, 0)  # To what instrument belongs this payoff at maturity?
profit = payoff - premium

if profit == 0:
    print("Nothing, really.")

elif profit < 0:
    print("That went wrong.")

elif 0 <= profit < 16000:
    print("I spend my Holidays on Hawaii.")

elif 16000 <= profit < 100000:
    print("Retirement is coming earlier!")

# this statement is never (!) reached
elif profit == 1:
    pass

# e.g. profit > 1000000
else:
    print("I am rich!")

# 4. for and while | Loops 

In [None]:
for p in (.5, 1.2, 3):
    print(f"The put option's price is: {p}\n")

In [None]:
"""while-loop"""

counter = 10

while counter > 0:
    print(f"Oh man, still {counter - 1} iterations to go ...")
    counter -= 1  # important to ensure that the loop ends == a terminal condition is met
    # break  # we can also use 'break' to exit a loop prematurely or when a condition is met


# 5. Errors, Traceback, and Exceptions

In [None]:
# SyntaxError
# print("Hello World'  # Raises SyntaxError

# KeyError
# d = {}
# print(d["non_existing_key"])  # Raises KeyError

# IndexError
# lst = []
# print(lst[1])  # Raises IndexError

# TypeError
# x = 5 + "10"  # Raises TypeError

# ValueError
# int("Not a number")  # Raises ValueError

# NameError
# print(undefined_variable)  # Raises NameError

In [None]:
y = 20
x = 0

try:
    z = y / x

except ZeroDivisionError as e:
    print("You tried to divide by zero!")

except TypeError as e:
    raise e

except Exception as e:  # catch all other exceptions, generally speaking bad practice!
    pass  # same as 'do nothing', bad practice!

else:
    print("No error occurred!")

finally:
    print("This is always executed.")

# 6. Packges and Modules


In [None]:
import requests  # importing a package
import numpy as np  # assigning an alias to a package
import pandas as pd
from matplotlib import pyplot as plt  # importing only a submodule and assigning an alias

## Loading Data with 'pandas' and 'requests'

In [None]:
# using the packages

# reading data from a csv file with pandas
# consult the package documentation for details: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
coin_prices = pd.read_csv("data/coin_prices.csv", parse_dates=['time'])
print(coin_prices.head())  # pandas creates a very user-friendly pd.DataFrame object which we can take a look at
print(coin_prices.dtypes)

In [None]:
# reading csv data from a web address (Google Drive) from the authors of https://www.openassetpricing.com/
# https://drive.google.com/drive/folders/1o74QkDfUfeaDmdE73nJvMQ5vDB5RY-7Y  # public folder, the file id is for "Size.csv" 
os_ap = pd.read_csv(
    'https://drive.google.com/uc?id=1Jp0YRCu8KIVae_3-GGscxgY1lB50WMu-',
    index_col='date',  # make column 'date' the index
    delimiter=',',
    skiprows=0,  # default value
    usecols=range(1, 5 + 1)  # first 5 columns only
)
print(os_ap.head())

In [None]:
params = {
    'markets':     ['deribit-BTC-11OCT25-114000-C-option'],
    'pretty':      True,
    'start_time':  '2020-01-01',
    'paging_from': 'start',
}
# https://coverage.coinmetrics.io/markets-v2/deribit-BTC-11OCT25-114000-C-option
response = requests.get(
    'https://community-api.coinmetrics.io/v4/timeseries/market-contract-prices',
    params=params
).json()
call_prices = pd.DataFrame(response['data'])
print(call_prices.head())
print(call_prices.dtypes)

cols = ['mark_price', 'index_price', 'settlement_price_estimated']
date_cols = ['time', 'database_time', 'exchange_time']
call_prices[cols] = call_prices[cols].apply(pd.to_numeric)  # convert to numeric type
call_prices[date_cols] = call_prices[date_cols].apply(pd.to_datetime)  # convert to datetime objects
print(call_prices.dtypes)

## Simple matrix operations with 'numpy'

In quantitative finance, efficient numerical computation is essential. Whether we’re pricing derivatives, estimating risk, or backtesting trading strategies, we often deal with large datasets — prices, returns, or factor exposures — that must be processed quickly. This is where NumPy becomes indispensable.

NumPy provides the ndarray, a powerful data structure for numerical operations. Unlike standard Python lists, NumPy arrays are homogeneous and vectorized, meaning operations are executed in compiled C code rather than interpreted Python loops. This leads to massive performance gains, often orders of magnitude faster.


In [None]:
# simple one-dimensional price vector for a single asset
prices = np.array([100, 102, 101, 105])

In [None]:
# random data for simulations
returns = np.random.normal(0, 0.01, size=1000)

In [None]:
# Arrays can represent vectors, matrices, or higher-dimensional data. For instance, daily prices for 5 assets can be stored as a 2D matrix:
# random samples stem from a uniform distribution
price_matrix = np.random.rand(252, 5) * 100  # 252 days, 5 assets
print(price_matrix)

In [None]:
# We can then compute daily returns efficiently
returns = price_matrix[1:] / price_matrix[:-1] - 1
print(returns)

In [None]:
# Notice there are no explicit loops — NumPy broadcasts operations across entire arrays. This is the foundation for portfolio-level computations like covariance matrices, betas, or portfolio variance:
weights = np.array([0.2, 0.2, 0.2, 0.2, 0.2])  # equal weights
cov_matrix = np.cov(returns.T)
portfolio_var = weights.T @ cov_matrix @ weights
print(f'portfolio variance: {portfolio_var}')

Find replication portfolio weights given asset prices p and state-contingent payoffs A<br>
$w = A^{-1} p$ such that<br>
$A w = y$

In [None]:
# Example: 3 states, 3 assets
A = np.array(
    [
        [1.05, 0.0, 0.0],  # Asset payoffs in state 1
        [1.05, 1.0, 0.5],  # Asset payoffs in state 2
        [1.05, 0.5, 1.0]  # Asset payoffs in state 3
    ]
)  # shape (3,3)

p = np.array([1, 0.90, 0.95])  # asset prices
y = np.array([1.05, 1.3, 1.2])  # desired state payoff vector (e.g., payoff=1 in state1 only)

# 1) Exact replication (if A is square and invertible)
if A.shape[0] == A.shape[1] and np.linalg.matrix_rank(A) == A.shape[0]:
    w_exact = np.linalg.inv(A) @ y
    cost_exact = p @ w_exact
    print("Exact replication weights:", w_exact)
    print("Cost (exact):", cost_exact)
else:
    print("Exact replication not available (A not square/full-rank).")

# 2) Least-squares / pseudoinverse (minimum-norm solution that best matches y, say more states than assets)
w_pinv = np.linalg.pinv(A) @ y
residual = A @ w_pinv - y
cost_pinv = p @ w_pinv
print("\nPseudoinverse (least-squares) weights:", w_pinv)
print("Residual (A w - y):", residual)
print("Cost (p @ w):", cost_pinv)

Some more methods and operations with numpy:

In [None]:
A = np.array([[1, 2], [3, 4]])
b = np.array([[1, 2], [3, 4]])

np.zeros((2, 3))  # 2 rows, 3 columns
np.identity(3)  # 3x3 identity matrix
np.diag([1, 2, 3])  # diagonal matrix with 1,2,3 on the diagonal

print(A + b)  # matrix addition
print(A * b)  # element-wise multiplication
print(A @ b)  # matrix multiplication
print(np.linalg.inv(A))  # matrix inversion
print(np.linalg.det(A))  # determinant

## Visualizations with 'matplotlib'

In [None]:
call_prices.set_index('time')['mark_price'].plot(title='Call Option Prices over Time')

In [None]:
# Plot multiple time series
coins = ['btc', 'eth', 'ltc', 'xrp']
cprice = coin_prices.pivot(
        index='time', values='ReferenceRateUSD', columns='asset'
    )[coins]
lines = plt.plot(cprice)
plt.title('Cryptocurrency Prices over Time')
plt.ylabel('USD Price')
plt.legend(coins)
plt.grid()

In [None]:
# Creating a Figure with subplots and axes
fig, axs = plt.subplots(ncols=2, nrows=2, figsize=(10, 6))

In [None]:
# access and fill the subplots ...


There are many options that can be explored! Consider the examples in the gallery: https://matplotlib.org/stable/gallery/index.html