In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

BASE_PATH = "/content/drive/MyDrive/ds_shubham"
CSV_PATH = os.path.join(BASE_PATH, "csv_files")
OUTPUT_PATH = os.path.join(BASE_PATH, "outputs")

os.makedirs(CSV_PATH, exist_ok=True)
os.makedirs(OUTPUT_PATH, exist_ok=True)

print("Project Folder Created At:", BASE_PATH)
print("CSV Folder:", CSV_PATH)
print("Outputs Folder:", OUTPUT_PATH)

Project Folder Created At: /content/drive/MyDrive/ds_shubham
CSV Folder: /content/drive/MyDrive/ds_shubham/csv_files
Outputs Folder: /content/drive/MyDrive/ds_shubham/outputs


In [None]:
from google.colab import files
files.upload()

Output hidden; open in https://colab.research.google.com to view.

In [None]:
import shutil

shutil.move("/content/fear_greed_index_DataScience.csv", CSV_PATH)
shutil.move("/content/historical_data_DataScience.csv", CSV_PATH)

print("✅ CSV files moved into project folder:")
print(os.listdir(CSV_PATH))

✅ CSV files moved into project folder:
['fear_greed_index_DataScience.csv', 'historical_data_DataScience.csv']


In [None]:
os.listdir(CSV_PATH)

['fear_greed_index_DataScience.csv', 'historical_data_DataScience.csv']

In [None]:
import pandas as pd

# Correct paths from your Drive
trades_path = "/content/drive/MyDrive/ds_shubham/csv_files/historical_data_DataScience.csv"
sentiment_path = "/content/drive/MyDrive/ds_shubham/csv_files/fear_greed_index_DataScience.csv"

# Load files
trades = pd.read_csv(trades_path, low_memory=False)
sentiment = pd.read_csv(sentiment_path, low_memory=False)

print("✅ Trades Dataset Shape:", trades.shape)
print("✅ Sentiment Dataset Shape:", sentiment.shape)

trades.head(), sentiment.head()

✅ Trades Dataset Shape: (211224, 16)
✅ Sentiment Dataset Shape: (2644, 4)


(                                      Account  Coin  Execution Price  \
 0  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9769   
 1  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9800   
 2  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9855   
 3  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9874   
 4  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9894   
 
    Size Tokens  Size USD Side     Timestamp IST  Start Position Direction  \
 0       986.87   7872.16  BUY  02-12-2024 22:50        0.000000       Buy   
 1        16.00    127.68  BUY  02-12-2024 22:50      986.524596       Buy   
 2       144.09   1150.63  BUY  02-12-2024 22:50     1002.518996       Buy   
 3       142.98   1142.04  BUY  02-12-2024 22:50     1146.558564       Buy   
 4         8.73     69.75  BUY  02-12-2024 22:50     1289.488521       Buy   
 
    Closed PnL                                   Transaction Hash     Order ID  \
 0      

In [None]:
# Convert trades timestamp to datetime
trades['Timestamp IST'] = pd.to_datetime(trades['Timestamp IST'], errors='coerce', dayfirst=True)
trades['date'] = trades['Timestamp IST'].dt.date

# Convert sentiment date
sentiment['date'] = pd.to_datetime(sentiment['date'], errors='coerce').dt.date

print("✅ Trades Date Range:", trades['date'].min(), "to", trades['date'].max())
print("✅ Sentiment Date Range:", sentiment['date'].min(), "to", sentiment['date'].max())

✅ Trades Date Range: 2023-05-01 to 2025-05-01
✅ Sentiment Date Range: 2018-02-01 to 2025-05-02


In [None]:
merged = pd.merge(trades, sentiment, on="date", how="inner")

print("✅ Merged Dataset Shape:", merged.shape)
merged[['date', 'classification']].head()

✅ Merged Dataset Shape: (211218, 20)


Unnamed: 0,date,classification
0,2024-12-02,Extreme Greed
1,2024-12-02,Extreme Greed
2,2024-12-02,Extreme Greed
3,2024-12-02,Extreme Greed
4,2024-12-02,Extreme Greed


In [None]:
clean_path = "/content/drive/MyDrive/ds_shubham/csv_files/trades_clean.csv"
merged.to_csv(clean_path, index=False)

print("✅ Cleaned dataset saved at:")
print(clean_path)

✅ Cleaned dataset saved at:
/content/drive/MyDrive/ds_shubham/csv_files/trades_clean.csv


In [None]:
import numpy as np

# Create daily summary
daily_summary = merged.groupby(["date", "classification"]).agg(
    trade_count=("Closed PnL", "count"),
    win_rate=("Closed PnL", lambda x: (x > 0).mean()),
    avg_closed_pnl=("Closed PnL", "mean"),
    avg_size=("Size USD", "mean")
).reset_index()

daily_summary.head()

Unnamed: 0,date,classification,trade_count,win_rate,avg_closed_pnl,avg_size
0,2023-05-01,Greed,3,0.0,0.0,159.0
1,2023-12-05,Extreme Greed,9,0.0,0.0,5556.203333
2,2023-12-14,Greed,11,0.363636,-18.675885,10291.213636
3,2023-12-15,Greed,2,0.0,-12.316017,5304.975
4,2023-12-16,Greed,3,0.0,0.0,5116.256667


In [None]:
daily_path = "/content/drive/MyDrive/ds_shubham/csv_files/daily_summary.csv"

daily_summary.to_csv(daily_path, index=False)

print("✅ Daily summary saved at:")
print(daily_path)

✅ Daily summary saved at:
/content/drive/MyDrive/ds_shubham/csv_files/daily_summary.csv


In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd

OUTPUT_PATH = "/content/drive/MyDrive/ds_shubham/outputs"
os.makedirs(OUTPUT_PATH, exist_ok=True)

daily_summary = pd.read_csv("/content/drive/MyDrive/ds_shubham/csv_files/daily_summary.csv")

In [None]:
plt.figure()
daily_summary.groupby("date")["trade_count"].sum().plot()
plt.title("Trades Per Day")
plt.xlabel("Date")
plt.ylabel("Trade Count")
plt.savefig(f"{OUTPUT_PATH}/trade_count_per_day.png")
plt.close()

print("✅ Saved:", f"{OUTPUT_PATH}/trade_count_per_day.png")

✅ Saved: /content/drive/MyDrive/ds_shubham/outputs/trade_count_per_day.png


In [None]:
plt.figure()
daily_summary.groupby("date")["win_rate"].mean().plot()
plt.title("Win Rate Per Day")
plt.xlabel("Date")
plt.ylabel("Win Rate")
plt.savefig(f"{OUTPUT_PATH}/win_rate_per_day.png")
plt.close()

print("✅ Saved:", f"{OUTPUT_PATH}/win_rate_per_day.png")

✅ Saved: /content/drive/MyDrive/ds_shubham/outputs/win_rate_per_day.png


In [None]:
plt.figure()
daily_summary.groupby("date")["avg_closed_pnl"].mean().plot()
plt.title("Average Closed PnL Per Day")
plt.xlabel("Date")
plt.ylabel("Average PnL")
plt.savefig(f"{OUTPUT_PATH}/avg_closed_pnl_per_day.png")
plt.close()

print("✅ Saved:", f"{OUTPUT_PATH}/avg_closed_pnl_per_day.png")

✅ Saved: /content/drive/MyDrive/ds_shubham/outputs/avg_closed_pnl_per_day.png


In [None]:
plt.figure()
daily_summary.groupby("date")["avg_size"].mean().plot()
plt.title("Average Trade Size Per Day")
plt.xlabel("Date")
plt.ylabel("Average USD Size")
plt.savefig(f"{OUTPUT_PATH}/avg_trade_size_per_day.png")
plt.close()

print("✅ Saved:", f"{OUTPUT_PATH}/avg_trade_size_per_day.png")

✅ Saved: /content/drive/MyDrive/ds_shubham/outputs/avg_trade_size_per_day.png


In [None]:
plt.figure()

grouped = []
categories = []

for name, group in daily_summary.groupby("classification"):
    grouped.append(group["avg_closed_pnl"])
    categories.append(name)

plt.boxplot(grouped, tick_labels=categories)
plt.title("Sentiment vs Average Daily PnL")
plt.xlabel("Market Sentiment")
plt.ylabel("Average Daily PnL")

plt.savefig(f"{OUTPUT_PATH}/sentiment_vs_avg_pnl.png")
plt.close()

print("✅ Saved:", f"{OUTPUT_PATH}/sentiment_vs_avg_pnl.png")

✅ Saved: /content/drive/MyDrive/ds_shubham/outputs/sentiment_vs_avg_pnl.png


In [None]:
merged.columns

Index(['Account', 'Coin', 'Execution Price', 'Size Tokens', 'Size USD', 'Side',
       'Timestamp IST', 'Start Position', 'Direction', 'Closed PnL',
       'Transaction Hash', 'Order ID', 'Crossed', 'Fee', 'Trade ID',
       'Timestamp', 'date', 'timestamp', 'value', 'classification'],
      dtype='object')

In [None]:
# Risk Proxy Analysis Using Trade Size

risk_proxy_summary = daily_summary.groupby("classification").agg(
    avg_trade_size=("avg_size", "mean"),
    median_trade_size=("avg_size", "median"),
    max_trade_size=("avg_size", "max")
).reset_index()

risk_proxy_summary

Unnamed: 0,classification,avg_trade_size,median_trade_size,max_trade_size
0,Extreme Fear,4091.804366,4236.12751,9405.505196
1,Extreme Greed,4410.524482,3418.98171,19021.28
2,Fear,6524.294937,4186.965333,26512.383826
3,Greed,6735.295813,5304.975,28583.181429
4,Neutral,7157.527121,6000.242,34988.58


In [None]:
risk_path = "/content/drive/MyDrive/ds_shubham/csv_files/risk_proxy_summary.csv"

risk_proxy_summary.to_csv(risk_path, index=False)

print("✅ Risk proxy summary saved at:")
print(risk_path)

✅ Risk proxy summary saved at:
/content/drive/MyDrive/ds_shubham/csv_files/risk_proxy_summary.csv


In [None]:
plt.figure()

grouped = []
categories = []

for name, group in daily_summary.groupby("classification"):
    grouped.append(group["avg_size"])
    categories.append(name)

plt.boxplot(grouped, tick_labels=categories)
plt.title("Sentiment vs Risk Exposure (Trade Size Proxy)")
plt.xlabel("Market Sentiment")
plt.ylabel("Average Trade Size (USD)")

plt.savefig(f"{OUTPUT_PATH}/sentiment_vs_risk_proxy.png")
plt.close()

print("✅ Saved:", f"{OUTPUT_PATH}/sentiment_vs_risk_proxy.png")

✅ Saved: /content/drive/MyDrive/ds_shubham/outputs/sentiment_vs_risk_proxy.png


In [None]:
# Advanced Risk Metric: Daily PnL Volatility

pnl_volatility = daily_summary.groupby("classification").agg(
    pnl_std_dev=("avg_closed_pnl", "std"),
    pnl_mean=("avg_closed_pnl", "mean"),
    days=("date", "nunique")
).reset_index()

pnl_volatility

Unnamed: 0,classification,pnl_std_dev,pnl_mean,days
0,Extreme Fear,74.222359,38.433903,14
1,Extreme Greed,117.423744,56.736108,114
2,Fear,129.547847,31.276567,91
3,Greed,136.17014,39.412175,193
4,Neutral,225.336392,63.820699,67


In [None]:
# Sharpe-like Risk Adjusted Return

pnl_volatility["sharpe_like_ratio"] = (
    pnl_volatility["pnl_mean"] / pnl_volatility["pnl_std_dev"]
)

pnl_volatility

Unnamed: 0,classification,pnl_std_dev,pnl_mean,days,sharpe_like_ratio
0,Extreme Fear,74.222359,38.433903,14,0.517821
1,Extreme Greed,117.423744,56.736108,114,0.483174
2,Fear,129.547847,31.276567,91,0.241429
3,Greed,136.17014,39.412175,193,0.289433
4,Neutral,225.336392,63.820699,67,0.283224


In [None]:
vol_path = "/content/drive/MyDrive/ds_shubham/csv_files/pnl_risk_metrics.csv"

pnl_volatility.to_csv(vol_path, index=False)

print("✅ Advanced risk metrics saved at:")
print(vol_path)

✅ Advanced risk metrics saved at:
/content/drive/MyDrive/ds_shubham/csv_files/pnl_risk_metrics.csv


In [None]:
plt.figure()

pnl_volatility.set_index("classification")["pnl_std_dev"].plot(kind="bar")
plt.title("PnL Volatility by Market Sentiment")
plt.xlabel("Market Sentiment")
plt.ylabel("PnL Standard Deviation")

plt.savefig(f"{OUTPUT_PATH}/sentiment_vs_pnl_volatility.png")
plt.close()

print("✅ Saved:", f"{OUTPUT_PATH}/sentiment_vs_pnl_volatility.png")

✅ Saved: /content/drive/MyDrive/ds_shubham/outputs/sentiment_vs_pnl_volatility.png


In [None]:
from scipy import stats

# Helper: select data safely
fear_pnl = daily_summary.loc[
    daily_summary['classification'] == 'Fear',
    'avg_closed_pnl'
].dropna()

greed_like_pnl = daily_summary.loc[
    daily_summary['classification'].isin(['Greed', 'Extreme Greed']),
    'avg_closed_pnl'
].dropna()

fear_win = daily_summary.loc[
    daily_summary['classification'] == 'Fear',
    'win_rate'
].dropna()

greed_like_win = daily_summary.loc[
    daily_summary['classification'].isin(['Greed', 'Extreme Greed']),
    'win_rate'
].dropna()

print("Number of Fear days:", len(fear_pnl))
print("Number of Greed/Extreme Greed days:", len(greed_like_pnl))

# Mann–Whitney U Tests
stat_pnl, pval_pnl = stats.mannwhitneyu(
    fear_pnl,
    greed_like_pnl,
    alternative="two-sided"
)

stat_win, pval_win = stats.mannwhitneyu(
    fear_win,
    greed_like_win,
    alternative="two-sided"
)

print("✅ PnL Test p-value:", pval_pnl)
print("✅ Win Rate Test p-value:", pval_win)

Number of Fear days: 91
Number of Greed/Extreme Greed days: 307
✅ PnL Test p-value: 0.9900572745958655
✅ Win Rate Test p-value: 0.11856110296158964
