In [42]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

In [43]:
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 200)

In [44]:
ROOT = Path.cwd()
DATA_DIR = ROOT / "csv_files"
OUTPUT_DIR = ROOT / "outputs"

DATA_DIR.mkdir(exist_ok=True)
OUTPUT_DIR.mkdir(exist_ok=True)

print("Data dir:", DATA_DIR)
print("Output dir:", OUTPUT_DIR)

Data dir: D:\Scholar\Pro Max\Projects\ds_bhaskar\csv_files
Output dir: D:\Scholar\Pro Max\Projects\ds_bhaskar\outputs


In [45]:
trades = pd.read_csv(DATA_DIR / "historical_data.csv")
sentiment = pd.read_csv(DATA_DIR / "fear_greed_index.csv")

print("Trades shape:", trades.shape)
print("Sentiment shape:", sentiment.shape)

Trades shape: (211224, 16)
Sentiment shape: (2644, 4)


In [46]:
trades = trades.rename(columns={
    "Account": "account",
    "Coin": "symbol",
    "Execution Price": "execution_price",
    "Size Tokens": "size_tokens",
    "Size USD": "size_usd",
    "Side": "side",
    "Timestamp IST": "timestamp_ist",
    "Closed PnL": "closed_pnl",
    "Fee": "fee",
    "Timestamp": "timestamp"
})

In [47]:
trades.info()
trades.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211224 entries, 0 to 211223
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   account           211224 non-null  object 
 1   symbol            211224 non-null  object 
 2   execution_price   211224 non-null  float64
 3   size_tokens       211224 non-null  float64
 4   size_usd          211224 non-null  float64
 5   side              211224 non-null  object 
 6   timestamp_ist     211224 non-null  object 
 7   Start Position    211224 non-null  float64
 8   Direction         211224 non-null  object 
 9   closed_pnl        211224 non-null  float64
 10  Transaction Hash  211224 non-null  object 
 11  Order ID          211224 non-null  int64  
 12  Crossed           211224 non-null  bool   
 13  fee               211224 non-null  float64
 14  Trade ID          211224 non-null  float64
 15  timestamp         211224 non-null  float64
dtypes: bool(1), float64(

Unnamed: 0,account,symbol,execution_price,size_tokens,size_usd,side,timestamp_ist,Start Position,Direction,closed_pnl,Transaction Hash,Order ID,Crossed,fee,Trade ID,timestamp
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9769,986.87,7872.16,BUY,02-12-2024 22:50,0.0,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.345404,895000000000000.0,1730000000000.0
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.98,16.0,127.68,BUY,02-12-2024 22:50,986.524596,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.0056,443000000000000.0,1730000000000.0
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9855,144.09,1150.63,BUY,02-12-2024 22:50,1002.518996,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050431,660000000000000.0,1730000000000.0
3,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9874,142.98,1142.04,BUY,02-12-2024 22:50,1146.558564,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050043,1080000000000000.0,1730000000000.0
4,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9894,8.73,69.75,BUY,02-12-2024 22:50,1289.488521,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.003055,1050000000000000.0,1730000000000.0
5,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.99,1.41,11.27,BUY,02-12-2024 22:50,1298.215466,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.000493,733000000000000.0,1730000000000.0
6,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9934,144.09,1151.77,BUY,02-12-2024 22:50,1299.624972,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050431,906000000000000.0,1730000000000.0
7,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,8.0,34.0,272.0,BUY,02-12-2024 22:50,1443.664541,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.0119,396000000000000.0,1730000000000.0
8,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,8.0,46.0,368.0,BUY,02-12-2024 22:50,1477.652641,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.0161,946000000000000.0,1730000000000.0
9,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,8.0,12.5,100.0,BUY,02-12-2024 22:50,1523.636541,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.004375,34700000000000.0,1730000000000.0


In [48]:
trades["timestamp"] = pd.to_datetime(trades["timestamp"], unit="ms", errors="coerce")
trades["trade_date"] = trades["timestamp"].dt.date

In [49]:
trades["closed_pnl"].describe()

count    211224.000000
mean         48.749001
std         919.164828
min     -117990.104100
25%           0.000000
50%           0.000000
75%           5.792797
max      135329.090100
Name: closed_pnl, dtype: float64

In [50]:
sentiment = sentiment.rename(columns={
    "classification": "sentiment",
    "value": "sentiment_value"
})

In [51]:
sentiment.info()
sentiment.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2644 entries, 0 to 2643
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   timestamp        2644 non-null   int64 
 1   sentiment_value  2644 non-null   int64 
 2   sentiment        2644 non-null   object
 3   date             2644 non-null   object
dtypes: int64(2), object(2)
memory usage: 82.8+ KB


Unnamed: 0,timestamp,sentiment_value,sentiment,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03
3,1517722200,24,Extreme Fear,2018-02-04
4,1517808600,11,Extreme Fear,2018-02-05
5,1517895000,8,Extreme Fear,2018-02-06
6,1517981400,36,Fear,2018-02-07
7,1518067800,30,Fear,2018-02-08
8,1518154200,44,Fear,2018-02-09
9,1518240600,54,Neutral,2018-02-10


In [52]:
sentiment["date"] = pd.to_datetime(sentiment["date"], errors="coerce")
sentiment["trade_date"] = sentiment["date"].dt.date

In [53]:
sentiment["sentiment"] = (
    sentiment["sentiment"]
    .astype(str)
    .str.strip()
    .str.lower()
)

sentiment["sentiment"] = sentiment["sentiment"].replace({
    "extreme fear": "extreme_fear",
    "extreme greed": "extreme_greed"
})

In [54]:
sentiment["sentiment_binary"] = sentiment["sentiment"].map({
    "fear": 0,
    "extreme_fear": 0,
    "neutral": np.nan,      # neutral is not risk-on or risk-off
    "greed": 1,
    "extreme_greed": 1
})

In [55]:
sentiment["sentiment_ordinal"] = sentiment["sentiment"].map({
    "extreme_fear": -2,
    "fear": -1,
    "neutral": 0,
    "greed": 1,
    "extreme_greed": 2
})

In [56]:
sentiment["sentiment"].value_counts(dropna=False)
sentiment["sentiment_binary"].value_counts(dropna=False)
sentiment["sentiment_ordinal"].value_counts(dropna=False)

sentiment_ordinal
-1    781
 1    633
-2    508
 0    396
 2    326
Name: count, dtype: int64

In [57]:
sentiment_merge = (
    sentiment[["trade_date", "sentiment", "sentiment_binary"]]
    .drop_duplicates("trade_date")
)

In [58]:
trades_sentiment = trades.merge(
    sentiment_merge,
    on="trade_date",
    how="left"
)

In [59]:
trades_sentiment[["sentiment", "sentiment_binary"]].isna().mean()

sentiment           0.127642
sentiment_binary    0.161449
dtype: float64

In [61]:
trades_sentiment.to_csv(
    DATA_DIR / "trades_with_sentiment.csv",
    index=False
)