In [1]:
# Setup & Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [2]:
# Create folder structure
os.makedirs("csv_files", exist_ok=True)
os.makedirs("outputs", exist_ok=True)

In [3]:
# Load Datasets
trader_data_url = "https://drive.google.com/uc?id=1IAfLZwu6rJzyWKgBToqwSmmVYU6VbjVs"
sentiment_data_url = "https://drive.google.com/uc?id=1PgQC0tO8XN-wqkNyghWc_-mnrYv_nhSf"

In [4]:
# Data Preprocessing
trader_df = pd.read_csv(trader_data_url)
sentiment_df = pd.read_csv(sentiment_data_url)

print("Trader Data Shape:", trader_df.shape)
print("Sentiment Data Shape:", sentiment_df.shape)

Trader Data Shape: (211224, 16)
Sentiment Data Shape: (2644, 4)


In [5]:
# Convert date columns to datetime
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'], errors='coerce')
trader_df['Timestamp IST'] = pd.to_datetime(trader_df['Timestamp IST'], format='%d-%m-%Y %H:%M', errors='coerce')

In [6]:
# Extract date only from trader_df
trader_df['trade_date'] = trader_df['Timestamp IST'].dt.date
trader_df['trade_date'] = pd.to_datetime(trader_df['trade_date'])

In [7]:
# Standardize sentiment classification
sentiment_df['classification'] = sentiment_df['classification'].str.strip().str.title()
# Reduce to just Fear / Greed categories
sentiment_df['classification'] = sentiment_df['classification'].replace({
    'Extreme Fear': 'Fear',
    'Extreme Greed': 'Greed'
})

In [8]:
# Merge datasets
merged_df = pd.merge(
    trader_df,
    sentiment_df[['date', 'classification']],
    left_on='trade_date',
    right_on='date',
    how='left'
)

print("Merged Shape:", merged_df.shape)


Merged Shape: (211224, 19)


In [9]:
# Save merged data
merged_df.to_csv("csv_files/merged_trader_sentiment.csv", index=False)

In [10]:
# Exploratory Data Analysis (EDA)
sns.set(style="whitegrid")

In [11]:
# 1. Distribution of Sentiment Days
plt.figure(figsize=(6,4))
sns.countplot(x="classification", data=sentiment_df, palette="coolwarm", hue="classification", legend=False)
plt.title("Distribution of Fear vs Greed Days")
plt.savefig("outputs/sentiment_distribution.png")
plt.close()

In [12]:
# 2. Average Profitability by Sentiment
plt.figure(figsize=(6,4))
sns.barplot(
    x="classification",
    y="Closed PnL",
    data=merged_df,
    palette="coolwarm",
    hue="classification",
    legend=False,
    errorbar=None
)
plt.title("Average Closed PnL by Sentiment")
plt.savefig("outputs/profitability_by_sentiment.png")
plt.close()


In [None]:
# 3. Leverage Usage by Sentiment 
plt.figure(figsize=(6,4))
sns.boxplot(x="classification", y="Size USD", data=merged_df, palette="coolwarm", hue="classification", legend=False)
plt.title("Trade Size (USD) by Sentiment")
plt.savefig("outputs/trade_size_by_sentiment.png")
plt.close()

In [14]:
# 4. Win Rate by Sentiment
merged_df['profitable'] = merged_df['Closed PnL'] > 0
win_rate = merged_df.groupby('classification')['profitable'].mean().reset_index()

plt.figure(figsize=(6,4))
sns.barplot(x="classification", y="profitable", data=win_rate, palette="coolwarm", hue="classification", legend=False)
plt.title("Win Rate by Sentiment")
plt.ylabel("Win Rate")
plt.savefig("outputs/win_rate_by_sentiment.png")
plt.close()


In [15]:
# Save Processed Data
sentiment_df.to_csv("csv_files/processed_sentiment.csv", index=False)
trader_df.to_csv("csv_files/processed_trader_data.csv", index=False)

print("✅ Data processing & EDA complete! Check 'csv_files/' and 'outputs/' folders.")

✅ Data processing & EDA complete! Check 'csv_files/' and 'outputs/' folders.
