# GW2 Flipping Optimizer - Fill Probability Model

This notebook builds Weibull models for sell fill times per item.

In [None]:
import numpy as np
import pandas as pd
import os
import json
from datetime import datetime
from lifelines import WeibullFitter

# If running locally, adjust paths as needed
# If running in Colab, you'll need to upload your data folder

## 1. Load Transaction History

In [None]:
# Load sell history (this is what we need for sell fill probabilities)
sell_data_path = 'data/sell_orders/sell_history_2024-12-15.csv'  # adjust date as needed
sells = pd.read_csv(sell_data_path)

print(f"Loaded {len(sells)} sell transactions")
print(f"Unique items: {sells['item_id'].nunique()}")
sells.head()

## 2. Exploratory Data Analysis

In [None]:
# Most traded items by quantity
sells.groupby('item_name').agg({
    'quantity': 'sum',
    'time_to_fill_hours': ['mean', 'median', 'std']
}).sort_values(('quantity', 'sum'), ascending=False).head(20)

In [None]:
# Distribution of fill times
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.hist(sells['time_to_fill_hours'], bins=50, edgecolor='black')
plt.xlabel('Time to Fill (hours)')
plt.ylabel('Frequency')
plt.title('Distribution of Sell Order Fill Times')
plt.show()

print(f"Median fill time: {sells['time_to_fill_hours'].median():.2f} hours")
print(f"Mean fill time: {sells['time_to_fill_hours'].mean():.2f} hours")

## 3. Weibull Distribution Theory

The Weibull distribution is commonly used in survival analysis. For our use case:
- **Event**: Item sells ("failure" in survival analysis terms)
- **Duration**: Time from listing to sale
- **Censoring**: Items not yet sold (we'll handle this later with more data)

The Weibull PDF:
$$f(t) = \frac{\rho}{\lambda} \left( \frac{t}{\lambda} \right)^{\rho-1} e^{-\left( \frac{t}{\lambda} \right)^\rho}$$

The Weibull CDF (what we need for fill probability):
$$F(t) = 1 - e^{-\left( \frac{t}{\lambda} \right)^\rho}$$

Where:
- $\lambda$ (lambda) is the **scale parameter** - roughly the characteristic lifetime
- $\rho$ (rho) is the **shape parameter** - determines if failure rate increases/decreases over time
  - $\rho < 1$: Items more likely to sell early (fast movers)
  - $\rho = 1$: Constant sell rate (exponential distribution)
  - $\rho > 1$: Items more likely to sell later (slow movers)

For our optimizer, we use $F(t)$ to calculate: **P(item sells within t hours)**

## 4. Fit Weibull Models Per Item

In [None]:
def fit_item_models(df, min_observations=3):
    """
    Fit Weibull distribution for each item with sufficient data.
    
    Args:
        df: DataFrame with columns [item_id, item_name, time_to_fill_hours]
        min_observations: Minimum number of sales required to fit a model
    
    Returns:
        Dictionary mapping item_id -> model parameters
    """
    item_distributions = {}
    
    # Group by item_id (not item_name to avoid duplicate issues)
    for item_id, group in df.groupby('item_id'):
        if len(group) < min_observations:
            continue
        
        item_name = group['item_name'].iloc[0]
        durations = group['time_to_fill_hours'].dropna()
        
        # Fit Weibull
        wf = WeibullFitter()
        wf.fit(durations)
        
        item_distributions[int(item_id)] = {
            'item_name': item_name,
            'lambda_': float(wf.lambda_),  # scale parameter
            'rho_': float(wf.rho_),        # shape parameter
            'n_observations': len(durations),
            'median_fill_hours': float(durations.median()),
            'mean_fill_hours': float(durations.mean()),
            'std_fill_hours': float(durations.std())
        }
    
    return item_distributions

In [None]:
# Fit models
print("Fitting Weibull models per item...")
models = fit_item_models(sells, min_observations=3)
print(f"Fitted models for {len(models)} items")

# Save to JSON for later use
os.makedirs('data', exist_ok=True)
with open('data/item_fill_models.json', 'w') as f:
    json.dump(models, f, indent=2)
print("Models saved to data/item_fill_models.json")

## 5. Calculate Fill Probabilities

In [None]:
def calculate_fill_probability(lambda_, rho_, time_horizon_days):
    """
    Calculate P(item fills within time_horizon_days) using Weibull CDF.
    
    F(t) = 1 - exp(-(t/lambda)^rho)
    
    Args:
        lambda_: Scale parameter from Weibull fit
        rho_: Shape parameter from Weibull fit
        time_horizon_days: Time horizon in DAYS (converted to hours internally)
    
    Returns:
        Probability between 0 and 1
    """
    time_horizon_hours = time_horizon_days * 24
    return 1 - np.exp(-((time_horizon_hours / lambda_) ** rho_))

In [None]:
# Example: What's the probability each item fills within different time horizons?
time_horizons = [1, 3, 7]  # days

print("\nFill probabilities for top 10 items:\n")
print(f"{'Item':<30} {'N':<5} {'Rho':<6} {'1d':<8} {'3d':<8} {'7d':<8}")
print("-" * 75)

for item_id, model in list(models.items())[:10]:
    probs = [calculate_fill_probability(model['lambda_'], model['rho_'], days) 
             for days in time_horizons]
    
    print(f"{model['item_name'][:30]:<30} "
          f"{model['n_observations']:<5} "
          f"{model['rho_']:<6.2f} "
          f"{probs[0]:<8.1%} "
          f"{probs[1]:<8.1%} "
          f"{probs[2]:<8.1%}")

## 6. Prepare Data for Optimizer

Now we need to combine:
1. Fill probability models (what we just built)
2. Current market prices (from GW2 API)
3. Calculate margins and filter candidates

In [None]:
# TODO: Fetch current market prices from GW2 API
# For now, using dummy data structure to show the concept

def prepare_optimizer_input(models, market_data, 
                            min_margin=0.05, 
                            min_fill_prob=0.9, 
                            time_horizon_days=7):
    """
    Prepare candidate items for LP optimizer.
    
    Args:
        models: Dictionary of item_id -> Weibull parameters
        market_data: Dictionary of item_id -> {buy_price, sell_price}
        min_margin: Minimum profit margin (e.g., 0.05 = 5%)
        min_fill_prob: Minimum fill probability (e.g., 0.9 = 90%)
        time_horizon_days: Time horizon in DAYS
    
    Returns:
        DataFrame with columns: item_id, item_name, buy_price, sell_price,
                                margin_pct, fill_probability, expected_profit_per_item
    """
    candidates = []
    
    for item_id, model in models.items():
        # Get market data for this item
        if item_id not in market_data:
            continue
        
        buy_price = market_data[item_id]['buy_price']   # copper
        sell_price = market_data[item_id]['sell_price'] # copper
        
        # Calculate metrics
        margin = (sell_price - buy_price) / buy_price
        fill_prob = calculate_fill_probability(
            model['lambda_'], 
            model['rho_'], 
            time_horizon_days
        )
        
        # Apply filters
        if margin < min_margin:
            continue
        if fill_prob < min_fill_prob:
            continue
        
        # Calculate expected profit (accounting for fill probability)
        expected_profit = (sell_price - buy_price) * fill_prob
        
        candidates.append({
            'item_id': item_id,
            'item_name': model['item_name'],
            'buy_price': buy_price,
            'sell_price': sell_price,
            'margin_pct': margin * 100,
            'fill_probability': fill_prob,
            'expected_profit_per_item': expected_profit,
            'lambda_': model['lambda_'],
            'rho_': model['rho_']
        })
    
    return pd.DataFrame(candidates).sort_values('expected_profit_per_item', ascending=False)

In [None]:
# Example with dummy market data
# TODO: Replace with actual API call to /v2/commerce/prices

dummy_market_data = {}
for item_id in list(models.keys())[:5]:
    dummy_market_data[item_id] = {
        'buy_price': 1000,   # dummy
        'sell_price': 1150   # dummy
    }

# Prepare candidates
candidates = prepare_optimizer_input(
    models=models,
    market_data=dummy_market_data,
    min_margin=0.05,
    min_fill_prob=0.9,
    time_horizon_days=7
)

print(f"\nFound {len(candidates)} candidate items for optimization")
print("\nTop candidates:")
candidates.head(10)

## 7. Next Steps

- [ ] Fetch real market prices from GW2 API
- [ ] Build LP optimizer that takes these candidates and budget constraints
- [ ] Account for current positions (items already bought/listed)
- [ ] Build buy fill probability models (separate from sell fills)
- [ ] Integrate into Flask GUI