# Probability of Profit Model

Two calculations:

1. Calculate probability of stock moving x% from a random price point on historical pricing graph.
2. Calculate historical track record of a particular trader picking buy points that allow for x% move in the future.

In [1]:
import pandas as pd
import os

def read_polymarket_csv(csv_filename):
    """
    Read a Polymarket trades CSV file and return a pandas DataFrame.

    Example usage:
    `$ df = read_polymarket_csv('polymarket_trades_0xd218e4_202505301025.csv')`
    
    Args:
        csv_filename (str): Name of the CSV file (e.g., 'polymarket_trades_0xd218e4_202505301025.csv')
    
    Returns:
        pandas.DataFrame: DataFrame with trade data, trade_dttm converted to datetime
    """
    try:
        # Construct full path to the CSV file in the data directory
        file_path = os.path.join("data", csv_filename)
        
        # Check if file exists
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File not found: {file_path}")
        
        # Read the CSV file
        df = pd.read_csv(file_path)
        
        # Convert trade_dttm to datetime for proper analysis
        if 'trade_dttm' in df.columns:
            df['trade_dttm'] = pd.to_datetime(df['trade_dttm'])
        
        print(f"Successfully loaded {len(df)} trades from {csv_filename}")
        print(f"Columns: {list(df.columns)}")
        
        if 'trade_dttm' in df.columns:
            print(f"Date range: {df['trade_dttm'].min()} to {df['trade_dttm'].max()}")
        
        return df
        
    except FileNotFoundError as e:
        print(f"Error: {e}")
        return None
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return None


Successfully loaded 312 trades from poly_market_prices_10018_202505310659.csv
Columns: ['trade_dt', 'token_id', 'event_id', 'open_price', 'high_price', 'low_price', 'close_price', 'question', 'insert_time']


Unnamed: 0,trade_dt,token_id,event_id,open_price,high_price,low_price,close_price,question,insert_time
0,2024-04-03 00:00:00,1104070407288410746535798118573957347424626792...,10018,0.41,0.41,0.41,0.41,Sam Brown,2025-05-31 13:01:08
1,2024-04-03 00:00:00,6790784570260669878717452560602682254123712571...,10018,0.594059,0.594059,0.594059,0.594059,Jacky Rosen,2025-05-31 13:01:08
2,2024-04-04 00:00:00,1104070407288410746535798118573957347424626792...,10018,0.4,0.4,0.4,0.4,Sam Brown,2025-05-31 13:01:08
3,2024-04-04 00:00:00,6790784570260669878717452560602682254123712571...,10018,0.6,0.6,0.6,0.6,Jacky Rosen,2025-05-31 13:01:08
4,2024-04-05 00:00:00,6790784570260669878717452560602682254123712571...,10018,0.6,0.6,0.59,0.6,Jacky Rosen,2025-05-31 13:01:08


In [18]:
import pandas as pd
import numpy as np

def create_buy_results(df, target_gain_pct, price_column, date_column):    
    # Sort by date to ensure chronological order
    df_sorted = df.sort_values(date_column).reset_index(drop=True)
    
    # Calculate results for each potential buy point
    buy_results = []
    
    for i in range(len(df_sorted) - 1):  # Don't include last point (no future data)
        buy_price = df_sorted.iloc[i][price_column]
        buy_date = df_sorted.iloc[i][date_column]
        
        # Target price for 15% gain
        target_price = buy_price * (1 + target_gain_pct)
        
        # Look at all future prices after this buy point
        future_prices = df_sorted.iloc[i+1:][price_column]
        
        # Check if target price is ever reached
        max_future_price = future_prices.max() if len(future_prices) > 0 else buy_price
        gain_achieved = max_future_price >= target_price
        
        # Calculate actual max gain achieved
        max_gain_pct = (max_future_price - buy_price) / buy_price if buy_price > 0 else 0
        
        # Find when target was first reached (if ever)
        first_target_hit = None
        days_to_target = None
        
        if gain_achieved:
            target_hit_mask = future_prices >= target_price
            if target_hit_mask.any():
                first_hit_idx = target_hit_mask.idxmax()
                first_target_hit = df_sorted.iloc[first_hit_idx][date_column]
                days_to_target = (pd.to_datetime(first_target_hit) - pd.to_datetime(buy_date)).days
        
        buy_results.append({
            'buy_index': i,
            'buy_date': buy_date,
            'buy_price': buy_price,
            'target_price': target_price,
            'max_future_price': max_future_price,
            'gain_achieved': gain_achieved,
            'max_gain_pct': max_gain_pct,
            'days_to_target': days_to_target
        })
        
    # Convert to DataFrame for analysis
    return pd.DataFrame(buy_results)

In [23]:
#df = read_polymarket_csv('poly_market_prices_10018_202505310659.csv')

df = read_polymarket_csv('poly_market_prices_16403_202505310709.csv')
data = df[df['question'] == 'Chargers vs. Texans - Chargers'].copy()

#calculate_gain_probability(data)

create_buy_results(data, 0.15, 'close_price', 'trade_dt')


df_sorted = data.sort_values('close_price').reset_index(drop=True)
    
range(len(df_sorted) - 1)

df

Successfully loaded 14 trades from poly_market_prices_16403_202505310709.csv
Columns: ['trade_dt', 'token_id', 'event_id', 'open_price', 'high_price', 'low_price', 'close_price', 'question', 'insert_time']


Unnamed: 0,trade_dt,token_id,event_id,open_price,high_price,low_price,close_price,question,insert_time
0,2025-01-06 00:00:00,2979083389690051197257811567629280221509010255...,16403,0.41,0.95,0.04,0.4,Chargers vs. Texans - Texans,2025-05-31 14:01:01
1,2025-01-06 00:00:00,9512366616502553113701155174432964511959850750...,16403,0.59,0.724255,0.03,0.6,Chargers vs. Texans - Chargers,2025-05-31 14:01:01
2,2025-01-07 00:00:00,2979083389690051197257811567629280221509010255...,16403,0.41,0.42,0.39,0.4,Chargers vs. Texans - Texans,2025-05-31 14:01:01
3,2025-01-07 00:00:00,9512366616502553113701155174432964511959850750...,16403,0.59,0.61,0.58,0.6,Chargers vs. Texans - Chargers,2025-05-31 14:01:01
4,2025-01-08 00:00:00,2979083389690051197257811567629280221509010255...,16403,0.4,0.42,0.4,0.4,Chargers vs. Texans - Texans,2025-05-31 14:01:01
5,2025-01-08 00:00:00,9512366616502553113701155174432964511959850750...,16403,0.6,0.6,0.58,0.59,Chargers vs. Texans - Chargers,2025-05-31 14:01:01
6,2025-01-09 00:00:00,2979083389690051197257811567629280221509010255...,16403,0.4,0.42,0.4,0.41,Chargers vs. Texans - Texans,2025-05-31 14:01:01
7,2025-01-09 00:00:00,9512366616502553113701155174432964511959850750...,16403,0.6,0.6,0.58,0.59,Chargers vs. Texans - Chargers,2025-05-31 14:01:01
8,2025-01-10 00:00:00,2979083389690051197257811567629280221509010255...,16403,0.4,0.42,0.4,0.4,Chargers vs. Texans - Texans,2025-05-31 14:01:01
9,2025-01-10 00:00:00,9512366616502553113701155174432964511959850750...,16403,0.6,0.6,0.58,0.6,Chargers vs. Texans - Chargers,2025-05-31 14:01:01


In [15]:
def calculate_gain_probability(df, target_gain_pct=0.15, price_column='close_price', date_column='trade_dt'):
    """
    Calculate the probability that a price goes up by target_gain_pct from any random buy point.
    
    Args:
        df (pd.DataFrame): DataFrame with price data
        target_gain_pct (float): Target gain percentage (0.15 = 15%)
        price_column (str): Name of the price column
        date_column (str): Name of the date column
    
    Returns:
        dict: Results including probability and detailed analysis
    """
    
   
    results_df = create_buy_results(df, target_gain_pct, price_column, date_column)
    
    # Calculate overall probability
    total_buy_points = len(results_df)
    successful_points = results_df['gain_achieved'].sum()
    probability = successful_points / total_buy_points if total_buy_points > 0 else 0
    
    # Additional statistics
    avg_max_gain = results_df['max_gain_pct'].mean()
    median_max_gain = results_df['max_gain_pct'].median()
    
    # For successful cases, average time to target
    successful_cases = results_df[results_df['gain_achieved'] == True]
    avg_days_to_target = successful_cases['days_to_target'].mean() if len(successful_cases) > 0 else None

    print(results_df['buy_price'])
    # Price level analysis
    price_bins = pd.qcut(results_df['buy_price'], q=5, labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])
    success_by_price_level = results_df.groupby(price_bins)['gain_achieved'].agg(['count', 'sum', 'mean'])
    
    return {
        'probability': probability,
        'total_buy_points': total_buy_points,
        'successful_points': successful_points,
        'avg_max_gain': avg_max_gain,
        'median_max_gain': median_max_gain,
        'avg_days_to_target': avg_days_to_target,
        'success_by_price_level': success_by_price_level,
        'detailed_results': results_df
    }

def analyze_multiple_targets(df, target_gains=[0.05, 0.10, 0.15, 0.20, 0.25], price_column='close_price'):
    """
    Analyze probability for multiple target gain percentages.
    
    Args:
        df (pd.DataFrame): Price data
        target_gains (list): List of target gain percentages to analyze
        price_column (str): Name of price column
    
    Returns:
        pd.DataFrame: Summary of probabilities for different targets
    """
    
    results = []
    
    for target in target_gains:
        analysis = calculate_gain_probability(df, target_gain_pct=target, price_column=price_column)
        results.append({
            'target_gain': f"{target:.1%}",
            'probability': f"{analysis['probability']:.1%}",
            'avg_days_to_target': analysis['avg_days_to_target']
        })
    
    return pd.DataFrame(results)

def print_analysis_summary(df, target_gain_pct=0.15, price_column='close_price'):
    """
    Print a comprehensive analysis of gain probabilities.
    """
    
    analysis = calculate_gain_probability(df, target_gain_pct, price_column)
    
    print(f"=== {target_gain_pct:.1%} Gain Probability Analysis ===")
    print(f"")
    print(f"Overall Probability: {analysis['probability']:.1%}")
    print(f"Total Buy Points Analyzed: {analysis['total_buy_points']:,}")
    print(f"Successful Points: {analysis['successful_points']:,}")
    print(f"")
    print(f"Average Max Gain Achieved: {analysis['avg_max_gain']:.1%}")
    print(f"Median Max Gain Achieved: {analysis['median_max_gain']:.1%}")
    
    if analysis['avg_days_to_target']:
        print(f"Average Days to Reach Target: {analysis['avg_days_to_target']:.1f}")
    
    print(f"")
    print("Success Rate by Price Level:")
    print(analysis['success_by_price_level'])
    
    # Multiple target analysis
    print(f"\n=== Multiple Target Analysis ===")
    multi_analysis = analyze_multiple_targets(df)
    print(multi_analysis)

# Example usage for your data:
"""
# Assuming your DataFrame is called 'df' and has columns 'trade_dt' and 'close_price'

# For Sam Brown token specifically
sam_brown_data = df[df['question'] == 'Sam Brown'].copy()

# Calculate 15% gain probability
results = print_analysis_summary(sam_brown_data, target_gain_pct=0.15, price_column='close_price')

# For Jacky Rosen token
jacky_rosen_data = df[df['question'] == 'Jacky Rosen'].copy()
results = print_analysis_summary(jacky_rosen_data, target_gain_pct=0.15, price_column='close_price')

# Compare different gain targets
multi_results = analyze_multiple_targets(sam_brown_data, target_gains=[0.05, 0.10, 0.15, 0.20, 0.30])
print(multi_results)
"""

"\n# Assuming your DataFrame is called 'df' and has columns 'trade_dt' and 'close_price'\n\n# For Sam Brown token specifically\nsam_brown_data = df[df['question'] == 'Sam Brown'].copy()\n\n# Calculate 15% gain probability\nresults = print_analysis_summary(sam_brown_data, target_gain_pct=0.15, price_column='close_price')\n\n# For Jacky Rosen token\njacky_rosen_data = df[df['question'] == 'Jacky Rosen'].copy()\nresults = print_analysis_summary(jacky_rosen_data, target_gain_pct=0.15, price_column='close_price')\n\n# Compare different gain targets\nmulti_results = analyze_multiple_targets(sam_brown_data, target_gains=[0.05, 0.10, 0.15, 0.20, 0.30])\nprint(multi_results)\n"

In [17]:
#df = read_polymarket_csv('poly_market_prices_10018_202505310659.csv')

df = read_polymarket_csv('poly_market_prices_16403_202505310709.csv')
data = df[df['question'] == 'Chargers vs. Texans - Chargers'].copy()

#calculate_gain_probability(data)

create_buy_results(data, 0.15, 'close_price', 'trade_dt')

Successfully loaded 14 trades from poly_market_prices_16403_202505310709.csv
Columns: ['trade_dt', 'token_id', 'event_id', 'open_price', 'high_price', 'low_price', 'close_price', 'question', 'insert_time']


Unnamed: 0,buy_index,buy_date,buy_price,target_price,max_future_price,gain_achieved,max_gain_pct,days_to_target
0,0,2025-01-06 00:00:00,0.6,0.69,0.6,False,1.850372e-16,
1,1,2025-01-07 00:00:00,0.6,0.69,0.6,False,1.850372e-16,
2,2,2025-01-08 00:00:00,0.59,0.6785,0.6,False,0.01694915,
3,3,2025-01-09 00:00:00,0.59,0.6785,0.6,False,0.01694915,
4,4,2025-01-10 00:00:00,0.6,0.69,0.32,False,-0.4666667,
5,5,2025-01-11 00:00:00,0.32,0.368,0.001,False,-0.996875,
