Installation of fireducks library

In [7]:
!pip install fireducks

Collecting fireducks
  Downloading fireducks-1.2.5-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting firefw==1.2.5 (from fireducks)
  Downloading firefw-1.2.5-py3-none-any.whl.metadata (818 bytes)
Collecting pyarrow<19.1,>=19.0 (from fireducks)
  Downloading pyarrow-19.0.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Downloading fireducks-1.2.5-cp311-cp311-manylinux_2_28_x86_64.whl (7.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.3/7.3 MB[0m [31m43.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading firefw-1.2.5-py3-none-any.whl (12 kB)
Downloading pyarrow-19.0.1-cp311-cp311-manylinux_2_28_x86_64.whl (42.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.1/42.1 MB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyarrow, firefw, fireducks
  Attempting uninstall: pyarrow
    Found existing installation: pyarrow 18.1.0
    Uninstalling pyarrow-18.1.0:
      Successfully uninstalled pya

Read the csv file and create two dfs

In [3]:
import pandas as pd
import fireducks.pandas as fpd  # Hypothetical Fireducks-Pandas integration
import numpy as np
import time

# Load data
df = pd.read_csv("stocks.csv")

# Calculate daily returns
df["daily_return"] = df.groupby("symbol")["price"].pct_change()

# Filter NaN (first day)
df_clean = df.dropna()

# Preview
print(df_clean.head())
df = df_clean
df_fireducks = fpd.DataFrame(df)

  symbol        date  price  daily_return
1   MSFT  Feb 1 2000  36.35     -0.086913
2   MSFT  Mar 1 2000  43.22      0.188996
3   MSFT  Apr 1 2000  28.37     -0.343591
4   MSFT  May 1 2000  25.45     -0.102926
5   MSFT  Jun 1 2000  32.54      0.278585


Pandas

In [4]:
start = time.time()
df["daily_return"] = df.groupby("symbol")["price"].pct_change()
pandas_time = time.time() - start
print(f"Pandas time: {pandas_time:.4f} seconds")

Pandas time: 0.0050 seconds


Fireducks

In [5]:
start = time.time()
df_fireducks["daily_return"] = df_fireducks.groupby("symbol")["price"].pct_change()
fireducks_time = time.time() - start
print(f"Fireducks time: {fireducks_time:.4f} seconds")

Fireducks time: 0.0100 seconds


Pandas

In [14]:
start = time.time()
df["cumulative_return"] = (
    df.groupby("symbol")["daily_return"]
    .apply(lambda x: (1 + x).cumprod() - 1)
    .reset_index(level=0, drop=True) # Reset index to match original DataFrame
)
pandas_time_cumulative = time.time() - start
print(f"Pandas time: {pandas_time_cumulative:.4f} seconds")

Pandas time: 0.0073 seconds


Fireducks

In [15]:
start = time.time()
# Hypothetical optimized Fireducks method
df_fireducks["cumulative_return"] = (
    df_fireducks.groupby("symbol")["daily_return"]
    .apply(lambda x: (1 + x).cumprod() - 1)
    .reset_index(level=0, drop=True) # Reset
)
fireducks_time_cumulative = time.time() - start
print(f"Fireducks time: {fireducks_time_cumulative:.4f} seconds")

Fireducks time: 0.0510 seconds


Pandas

In [16]:
start = time.time()
df["30d_volatility"] = (
    df.groupby("symbol")["daily_return"]
    .rolling(window=30, min_periods=15)
    .std()
    .reset_index(level=0, drop=True)
)
pandas_time = time.time() - start
print(f"Pandas rolling volatility time: {pandas_time:.2f} seconds")

Pandas rolling volatility time: 0.02 seconds


Fireducks

In [18]:
start = time.time()
df_fireducks["30d_volatility"] = (
    df_fireducks.groupby("symbol")["daily_return"]
    .rolling(window=30, min_periods=15)
    .std()
    .reset_index(level=0, drop=True)
)
fireducks_time = time.time() - start
print(f"Fireducks rolling volatility time: {fireducks_time:.2f} seconds")

Fireducks rolling volatility time: 0.01 seconds


Pandas

In [20]:
from scipy.optimize import minimize # Import the minimize function

returns = df.pivot(index="date", columns="symbol", values="daily_return")
cov_matrix = returns.cov().values
expected_returns = returns.mean().values

# Optimization function (Pandas + NumPy)
def objective(weights):
    port_return = np.dot(weights, expected_returns) * 252
    port_vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights))) * np.sqrt(252)
    return -port_return / port_vol  # Minimize negative Sharpe ratio

start = time.time()
result = minimize(
    objective,
    x0=np.ones(len(returns.columns)) / len(returns.columns),  # Equal weights, use returns.columns for len(symbols)
    bounds=[(0, 1)] * len(returns.columns), # Use returns.columns for len(symbols)
    constraints={"type": "eq", "fun": lambda w: np.sum(w) - 1}
)
pandas_time = time.time() - start
print(f"Pandas optimization time: {pandas_time:.2f} seconds")

Pandas optimization time: 0.04 seconds


Fireducks

In [22]:
start = time.time()

# Step 1: Compute covariance matrix using DuckDB
returns = df_fireducks.pivot(index="date", columns="symbol", values="daily_return")
cov_matrix = returns.cov().values
expected_returns = returns.mean().values

# Optimization function (Pandas + NumPy)
def objective(weights):
    port_return = np.dot(weights, expected_returns) * 252
    port_vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights))) * np.sqrt(252)
    return -port_return / port_vol  # Minimize negative Sharpe ratio

start = time.time()
result = minimize(
    objective,
    x0=np.ones(len(returns.columns)) / len(returns.columns),  # Equal weights, use returns.columns for len(symbols)
    bounds=[(0, 1)] * len(returns.columns), # Use returns.columns for len(symbols)
    constraints={"type": "eq", "fun": lambda w: np.sum(w) - 1}
)
fireducks_time = time.time() - start
print(f"Fireducks optimization time: {fireducks_time:.2f} seconds")

Fireducks optimization time: 0.01 seconds
