In [156]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import itertools

# Load data
price_data = pd.read_csv('basket_orderbook_data_all.csv', delimiter=';').fillna(0)
trade_data = pd.read_csv('basket_trade_data_all.csv', delimiter=';').fillna(0)
price_data['smart_price'] = (price_data['ask_price_1'] + price_data['bid_price_1']) / 2

# Names of the traders involved
traders = ['Remy', 'Vladimir', 'Vinnie', 'Rhianna']

# Generate all possible pair combinations of traders
combinations = list(itertools.combinations(traders, 2))

# Classify trades by combination and role
trade_types = {}
for trader1, trader2 in combinations:
    # Buyer-Seller combinations
    bs = trade_data[(trade_data['buyer'] == trader1) & (trade_data['seller'] == trader2)]
    sb = trade_data[(trade_data['buyer'] == trader2) & (trade_data['seller'] == trader1)]
    trade_types[f'{trader1} buys from {trader2}'] = bs
    trade_types[f'{trader2} buys from {trader1}'] = sb

def analyze_price_impact(trades, future_steps):
    results = []
    for _, trade in trades.iterrows():
        timestamp = trade['timestamp']
        buy_price = trade['price']
        price_changes = {}
        for step in future_steps:
            future_time = timestamp + step
            if future_time in price_data['timestamp'].values:
                future_price = price_data.loc[price_data['timestamp'] == future_time, 'smart_price'].values[0]
                price_change = (future_price - buy_price) / buy_price * 100
            else:
                price_change = np.nan  # Handle cases where the future timestamp isn't available
            price_changes[step] = price_change
        results.append(price_changes)

    result_df = pd.DataFrame(results)
    # Remove rows where all entries are NaN (no future data available)
    result_df = result_df.dropna(how='all')
    # Exclude zero price changes
    result_df = result_df[result_df != 0]
    return result_df.mean()

# Analyzing price changes for each trader combination
future_steps = [1, 5, 10, 25, 50, 100, 250, 500, 2000]
impact_results = {}
for trade_type, trades in trade_types.items():
    avg_changes = analyze_price_impact(trades, future_steps)
    trade_count = len(trades)
    impact_results[trade_type] = {
        'Trade Count': trade_count,
        'Average Change': avg_changes
    }

# Formatting and displaying the results
for trade_type, data in impact_results.items():
    print(f"Trade Type: {trade_type}")
    print(f"Trade Count: {data['Trade Count']}")
    print("Average Changes:")
    if not data['Average Change'].empty:
        for step, value in data['Average Change'].items():
            print(f"  {step} steps: {value:.2f}%")
    else:
        print("  No sufficient data for price change analysis.")
    print("\n")


Trade Type: Remy buys from Vladimir
Trade Count: 0
Average Changes:
  No sufficient data for price change analysis.


Trade Type: Vladimir buys from Remy
Trade Count: 0
Average Changes:
  No sufficient data for price change analysis.


Trade Type: Remy buys from Vinnie
Trade Count: 0
Average Changes:
  No sufficient data for price change analysis.


Trade Type: Vinnie buys from Remy
Trade Count: 0
Average Changes:
  No sufficient data for price change analysis.


Trade Type: Remy buys from Rhianna
Trade Count: 0
Average Changes:
  No sufficient data for price change analysis.


Trade Type: Rhianna buys from Remy
Trade Count: 0
Average Changes:
  No sufficient data for price change analysis.


Trade Type: Vladimir buys from Vinnie
Trade Count: 28
Average Changes:
  1 steps: -0.00%
  5 steps: 0.00%
  10 steps: 0.01%
  25 steps: 0.01%
  50 steps: 0.01%
  100 steps: 0.03%
  250 steps: -0.02%
  500 steps: -0.04%
  2000 steps: -0.16%


Trade Type: Vinnie buys from Vladimir
Trade Count: 0
Ave

In [155]:
import pandas as pd

# Load and preprocess data
price_data = pd.read_csv('basket_orderbook_data_all.csv', delimiter=';').fillna(0)
trade_data = pd.read_csv('basket_trade_data_all.csv', delimiter=';').fillna(0)

# Calculate the smart price
price_data['smart_price'] = (
    (((price_data['bid_price_1'] * price_data['bid_volume_1'] + 
       price_data['bid_price_2'] * price_data['bid_volume_2'] + 
       price_data['bid_price_3'] * price_data['bid_volume_3']) / 
      (price_data['bid_volume_1'] + price_data['bid_volume_2'] + price_data['bid_volume_3'])) *
     (price_data['ask_volume_1'] + price_data['ask_volume_2'] + price_data['ask_volume_3']) +
     ((price_data['ask_price_1'] * price_data['ask_volume_1'] + 
       price_data['ask_price_2'] * price_data['ask_volume_2'] + 
       price_data['ask_price_3'] * price_data['ask_volume_3']) / 
      (price_data['ask_volume_1'] + price_data['ask_volume_2'] + price_data['ask_volume_3'])) *
     (price_data['bid_volume_1'] + price_data['bid_volume_2'] + price_data['bid_volume_3'])
    ) / 
    (price_data['ask_volume_1'] + price_data['ask_volume_2'] + price_data['ask_volume_3'] +
     price_data['bid_volume_1'] + price_data['bid_volume_2'] + price_data['bid_volume_3'])
)

# Prepare trading data
traders = pd.DataFrame({
    'timestamp': trade_data['timestamp'].tolist() + trade_data['timestamp'].tolist(),
    'name': trade_data['buyer'].tolist() + trade_data['seller'].tolist(),
    'quantity': trade_data['quantity'].tolist() + [-x for x in trade_data['quantity'].tolist()],
    'price': trade_data['price'].tolist() + trade_data['price'].tolist(),
    'value': (trade_data['quantity'] * trade_data['price']).tolist() + [-x * y for x, y in zip(trade_data['quantity'], trade_data['price'])]
})

# Calculate cumulative position
traders['cumulative_position'] = traders.groupby('name')['quantity'].cumsum()

# Merge cumulative position and smart price
merged_data = pd.merge(traders, price_data[['timestamp', 'smart_price']], on='timestamp', how='left')

# Calculate trader's position signal: +1 for long, -1 for short
merged_data['position_signal'] = merged_data['cumulative_position'].apply(lambda x: 1 if x > 0 else -1)

# Calculate price movement signal: +1 if price went up, -1 if price went down
merged_data['price_movement_signal'] = merged_data['smart_price'].diff().fillna(0).apply(lambda x: 1 if x > 0 else -1)

# Calculate correlation for each trader
correlation_results = merged_data.groupby('name').apply(
    lambda df: df[['cumulative_position', 'smart_price']].corr().iloc[0, 1]
)
discrete_correlation_results = merged_data.groupby('name').apply(
    lambda df: df[['position_signal', 'smart_price']].corr().iloc[0, 1]
)

# Summarize buys, sells, and calculate PnL
traders_summary = traders.groupby('name').agg(
    total_quantity=('quantity', 'sum'),
    absolute_quantity=('quantity', lambda x: x.abs().sum()),
    total_value=('value', 'sum')
)

# Calculate the net position (quantity)
traders_summary['net_position'] = traders_summary['total_quantity']

# Get the final smart price to close positions
final_smart_price = price_data['smart_price'].iloc[-1]

# Calculate PnL
traders_summary['pnl'] = -traders_summary['total_value'] + (traders_summary['net_position'] * final_smart_price)

# Calculate PnL per unit volume traded
traders_summary['pnl_per_unit_volume'] = traders_summary['pnl'] / traders_summary['absolute_quantity']

# Add correlation data
traders_summary['position_price_correlation'] = correlation_results
traders_summary['discrete_position_price_correlation'] = discrete_correlation_results

# Print the results
print(traders_summary[['pnl', 'pnl_per_unit_volume', 'position_price_correlation', 'discrete_position_price_correlation']])


                pnl  pnl_per_unit_volume  position_price_correlation  \
name                                                                   
Rhianna   889336.50           325.764286                    0.395222   
Ruby       60431.25            21.777027                    0.070660   
Vinnie   -109747.75           -62.964859                   -0.139629   
Vladimir -840020.00          -220.362015                   -0.332196   

          discrete_position_price_correlation  
name                                           
Rhianna                              0.570294  
Ruby                                 0.318224  
Vinnie                                    NaN  
Vladimir                                  NaN  
