In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix

# Create project structure
project_root = "ds_P_Dhanush_Kumar"
os.makedirs(project_root, exist_ok=True)
os.makedirs(f"{project_root}/csv_files", exist_ok=True)
os.makedirs(f"{project_root}/outputs", exist_ok=True)


In [None]:
# Assuming files were uploaded manually in Colab
fear_greed = pd.read_csv("fear_greed_index.csv")
historical = pd.read_csv("historical_data.csv")

# Save original CSVs into project folder
fear_greed.to_csv(f"{project_root}/csv_files/fear_greed_index.csv", index=False)
historical.to_csv(f"{project_root}/csv_files/historical_data.csv", index=False)


In [None]:
# Convert sentiment timestamp (unix seconds)
fear_greed['date'] = pd.to_datetime(fear_greed['timestamp'], unit='s').dt.date

# Convert trade timestamp
historical['date'] = pd.to_datetime(historical['Timestamp'] / 1000, unit='s').dt.date

# Extract categorical sentiment
fear_greed['sentiment'] = fear_greed['classification'].astype(str)


In [None]:
# Binary encoding of side
historical['side_binary'] = historical['Side'].map({'BUY': 1, 'SELL': -1})

# Direction binary if exists
if 'Direction' in historical.columns:
    historical['direction_binary'] = historical['Direction'].map({'Buy': 1, 'Sell': -1})
else:
    historical['direction_binary'] = historical['side_binary']

# Hedge mismatch flag
historical['hedge_flag'] = (historical['side_binary'] != historical['direction_binary']).astype(int)

# PnL field
historical['pnl'] = historical['Closed PnL']


In [None]:
trades_daily = historical.groupby('date').agg({
    'pnl': ['sum', 'mean'],
    'side_binary': 'mean',
    'direction_binary': 'mean',
    'hedge_flag': 'mean',
    'Size USD': 'sum',
    'Execution Price': 'mean',
    'Trade ID': 'count'
})

trades_daily.columns = ['pnl_sum', 'pnl_mean', 'side_bias', 'direction_bias',
                        'hedge_rate', 'usd_volume', 'avg_price', 'trade_count']

trades_daily.reset_index(inplace=True)


In [None]:
merged = trades_daily.merge(fear_greed[['date', 'sentiment']], on='date', how='left')
merged.dropna(subset=['sentiment'], inplace=True)

merged.to_csv(f"{project_root}/csv_files/merged_sentiment_trades.csv", index=False)


In [None]:
sns.set(style="whitegrid")

# Profitability vs sentiment box plot
plt.figure(figsize=(7,4))
sns.boxplot(data=merged, x='sentiment', y='pnl_sum')
plt.title('PnL vs Sentiment')
plt.xticks(rotation=30)
plt.savefig(f"{project_root}/outputs/pnl_vs_sentiment_box.png", bbox_inches='tight')
plt.show()
plt.close()


In [None]:
merged.columns.tolist()


In [None]:
merged['risk_proxy'] = merged['usd_volume']   # participation risk scaling

plt.figure(figsize=(7,4))
sns.violinplot(x='sentiment', y='risk_proxy', data=merged)
plt.title('Risk Exposure (USD Volume) vs Sentiment')
plt.xticks(rotation=30)
plt.grid(True, ls='--', alpha=0.4)
plt.savefig(f"{project_root}/outputs/risk_vs_sentiment.png", bbox_inches='tight')
plt.show()


In [None]:
plt.figure(figsize=(7,4))
sns.boxplot(x='sentiment', y='usd_volume', data=merged)
plt.title('Market Participation (USD Volume) vs Sentiment')
plt.xticks(rotation=30)
plt.grid(True, ls='--', alpha=0.4)
plt.savefig(f"{project_root}/outputs/volume_vs_sentiment_box.png", bbox_inches='tight')
plt.show()


In [None]:
# Ensure binary outcome
merged['win_flag'] = (merged['pnl_sum'] > 0).astype(int)

plt.figure(figsize=(7,4))
sns.barplot(x='sentiment', y='win_flag', data=merged, estimator=np.mean)
plt.title('Win-Rate vs Sentiment')
plt.ylim(0,1)
plt.ylabel('Win Rate (%)')
plt.xticks(rotation=30)
plt.grid(True, ls='--', alpha=0.4)
plt.savefig(f"{project_root}/outputs/winrate_vs_sentiment.png", bbox_inches='tight')
plt.show()


In [None]:
plt.figure(figsize=(7,4))
sns.barplot(x='sentiment', y='direction_bias', data=merged, estimator=np.mean)
plt.title('Directional Bias vs Sentiment')
plt.ylabel('Bullish (+1) / Bearish (-1)')
plt.xticks(rotation=30)
plt.grid(True, ls='--', alpha=0.4)
plt.savefig(f"{project_root}/outputs/directional_bias_vs_sentiment.png", bbox_inches='tight')
plt.show()


In [None]:
merged['profit_positive'] = (merged['pnl_sum'] > 0).astype(int)

# Encode sentiment categories
le = LabelEncoder()
merged['sentiment_enc'] = le.fit_transform(merged['sentiment'])

X = merged[['sentiment_enc', 'usd_volume', 'hedge_rate', 'side_bias', 'direction_bias', 'trade_count']]
y = merged['profit_positive']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

clf = RandomForestClassifier(n_estimators=200, random_state=42)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, pred))
print("F1 Score:", f1_score(y_test, pred))
print("AUC:", roc_auc_score(y_test, pred))


In [None]:
feat_imp = pd.Series(clf.feature_importances_, index=X.columns)
plt.figure(figsize=(6,4))
feat_imp.sort_values().plot(kind='barh')
plt.title("Feature Importance")
plt.savefig(f"{project_root}/outputs/feature_importance.png", bbox_inches='tight')
plt.show()
plt.close()


In [None]:
with open(f"{project_root}/README.md", "w") as f:
    f.write("# Data Science Assignment — Web3 Trading\n")
    f.write("This repository contains analysis of trader behavior vs sentiment.\n\n")
    f.write("## Folder Structure\n")
    f.write("Same as assignment instructions.\n")


In [None]:
print("PDF report generated manually — attach ds_report.pdf to folder.")
