In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta

In [3]:
!pip install openpyxl



In [29]:
# Load data
market_df = pd.read_excel("/Users/myronkneip/nitter_scraper/Final/OHLC_data.xlsx", engine="openpyxl")
tweets_df = pd.read_excel("/Users/myronkneip/nitter_scraper/Final/Testing_data.xlsx", engine="openpyxl")

In [30]:
market_df.head()
tweets_df.head()

Unnamed: 0,tweet_id,Timestamp,Tweet,Twitter_acc
0,16065,2025-07-12 23:50:00,Federal Judge Orders Trump Admin To Halt Immig...,zerohedge_tweets
1,16066,2025-07-12 23:20:00,The cover up of Jeffrey Epstein is worse than ...,zerohedge_tweets
2,16067,2025-07-12 23:15:00,The Complete Q2 Earnings Playbook zerohedge.co...,zerohedge_tweets
3,16068,2025-07-12 23:03:00,BREAKING: Alex Soros has funneled $24 mil to t...,zerohedge_tweets
4,16069,2025-07-12 22:58:00,"""If there was ANYTHING in there that could hav...",zerohedge_tweets


In [31]:
# Parse datetime
market_df["Dates"] = pd.to_datetime(market_df["Dates"])
tweets_df["Timestamp"] = pd.to_datetime(tweets_df["Timestamp"], utc=True).dt.tz_convert(None)

In [32]:
# Compute Mid_Price
market_df["Mid_Price"] = market_df[["Mid_Open", "Mid_High", "Mid_Low", "Mid_Close"]].mean(axis=1)

In [33]:
# Set Dates as index for fast access
market_df.set_index("Dates", inplace=True)

In [34]:
# Time horizons to evaluate impact
time_horizons = {
    "MI_1min": timedelta(minutes=1),
    "MI_5min": timedelta(minutes=5),
    "MI_15min": timedelta(minutes=15),
    "MI_30min": timedelta(minutes=30),
    "MI_1h": timedelta(hours=1),
    "MI_2h": timedelta(hours=2),
    "MI_4h": timedelta(hours=4),
    "MI_8h": timedelta(hours=8),
    "MI_12h": timedelta(hours=12),
    "MI_1d": timedelta(days=1),
    "MI_2d": timedelta(days=2),
    "MI_3d": timedelta(days=3),
    "MI_4d": timedelta(days=4),
}

In [35]:
results = []

for _, row in tweets_df.iterrows():
    tweet_time = row["Timestamp"]
    tweet_id = row["tweet_id"]
    tweet_text = row["Tweet"]

    # Determine execution time
    base_time = market_df.index[market_df.index <= tweet_time].max()
    if pd.isna(base_time):
        continue

    if tweet_time not in market_df.index:
        # Market closed: use last price before and delay metrics
        execution_time = base_time
        execution_price = market_df.loc[execution_time, "Mid_Open"]
        delayed_flag = True
    else:
        execution_time = tweet_time
        execution_price = market_df.loc[execution_time, "Mid_Open"]
        delayed_flag = False

    impact_row = {
    "tweet_ID": tweet_id,
    "Timestamp": tweet_time,
    "Twitter_acc": row["Twitter_acc"],  # <-- Add this line
    "Tweet": tweet_text,
    "Mid_Open_Execution": execution_price,
    "Execution_Delayed": delayed_flag
    }
    
    for label, delta in time_horizons.items():
        if delayed_flag:
            # Delay: anchor interval deltas from the *first tradable minute after tweet*
            anchor_time = market_df.index[market_df.index > tweet_time].min()
        else:
            anchor_time = tweet_time

        target_time = anchor_time + delta
        future_time = market_df.index[market_df.index >= target_time].min()

        if pd.isna(future_time):
            impact_row[f"{label}_MidOpen"] = np.nan
            impact_row[f"{label}_MidClose"] = np.nan
            continue

        # Market Impact: MidOpen
        try:
            future_open = market_df.loc[future_time, "Mid_Open"]
            impact_row[f"{label}_MidOpen"] = ((future_open - execution_price) / execution_price) * 10000
        except KeyError:
            impact_row[f"{label}_MidOpen"] = np.nan

        # Market Impact: MidClose
        try:
            future_close = market_df.loc[future_time, "Mid_Close"]
            impact_row[f"{label}_MidClose"] = ((future_close - execution_price) / execution_price) * 10000
        except KeyError:
            impact_row[f"{label}_MidClose"] = np.nan

    results.append(impact_row)

In [36]:
# Pivot spread_df to wide format
spread_wide = spread_df.pivot(index="tweet_ID", columns="Horizon", values="SpreadChange_bps")

# Flatten column names like "MI_1min" → "MI_1min_SpreadChange_bps"
spread_wide.columns = [f"{col}_SpreadChange_bps" for col in spread_wide.columns]

# Reset index so tweet_ID becomes a column
spread_wide.reset_index(inplace=True)

# Merge into impact_df
impact_df = pd.merge(impact_df, spread_wide, on="tweet_ID", how="right")

# Optional: move all SpreadChange columns to the end
spread_cols = [col for col in impact_df.columns if col.endswith("_SpreadChange_bps")]
other_cols = [col for col in impact_df.columns if col not in spread_cols]
impact_df = impact_df[other_cols + spread_cols]

In [36]:
# Save to Excel
impact_df = pd.DataFrame(results)
impact_df.to_excel("tweet_market_impact_new.xlsx", index=False)

In [42]:
print(tweets_df.columns)

Index(['Timestamp', 'Tweet', 'Twitter_acc', 'tweet_id'], dtype='object')


In [2]:
import numpy as np
import plotly.graph_objects as go

# Define the range of returns
returns = np.linspace(-0.1, 0.2, 500)

# Fee structure parameters
management_fee = 0.02
hurdle_rate = 0.02
performance_rate = 0.20

# Calculate fees
performance_fee = np.where(returns > hurdle_rate, performance_rate * (returns - hurdle_rate), 0)
total_fees = management_fee + performance_fee

# Create interactive plot
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=returns * 100,
    y=management_fee * np.ones_like(returns) * 100,
    mode='lines',
    name='Management Fee (2%)',
    line=dict(dash='dash', color='orange')
))

fig.add_trace(go.Scatter(
    x=returns * 100,
    y=performance_fee * 100,
    mode='lines',
    name='Performance Fee',
    line=dict(color='blue')
))

fig.add_trace(go.Scatter(
    x=returns * 100,
    y=total_fees * 100,
    mode='lines',
    name='Total Fees',
    line=dict(color='red')
))

fig.add_vline(
    x=hurdle_rate * 100,
    line=dict(color='gray', dash='dot'),
    annotation_text='Hurdle Rate (2%)',
    annotation_position='top right'
)

fig.update_layout(
    title="Interactive Hedge Fund Fee Structure",
    xaxis_title="Fund Return (%)",
    yaxis_title="Fees (% of AUM)",
    legend_title="Fee Type",
    template="plotly_white"
)

fig.show()