In [None]:
!pip install -q gdown
!pip install -q pyarrow

import os
ROOT = "/content/ds_Umar_Ansari"
os.makedirs(ROOT, exist_ok=True)
os.makedirs(os.path.join(ROOT, "csv_files"), exist_ok=True)
os.makedirs(os.path.join(ROOT, "outputs"), exist_ok=True)


In [None]:
import gdown

#Using the IDs provided:
trader_file_id = "1IAfLZwu6rJzyWKgBToqwSmmVYU6VbjVs"
sentiment_file_id = "1PgQC0tO8XN-wqkNyghWc_-mnrYv_nhSf"

trader_out = os.path.join(ROOT, "csv_files", "historical_trader_data.csv")
sentiment_out = os.path.join(ROOT, "csv_files", "fear_greed_index.csv")

gdown.download(f"https://drive.google.com/uc?id={trader_file_id}", trader_out, quiet=False)
gdown.download(f"https://drive.google.com/uc?id={sentiment_file_id}", sentiment_out, quiet=False)



In [None]:
import pandas as pd
pd.options.display.max_columns = 50
pd.options.display.max_rows = 20

trader = pd.read_csv(trader_out, low_memory=False)
sent = pd.read_csv(sentiment_out, low_memory=False)

print("Trader shape:", trader.shape)
print("Sentiment shape:", sent.shape)
display(trader.head())
display(sent.head())


In [None]:
# Convert trader timestamp
trader["Timestamp IST"] = pd.to_datetime(trader["Timestamp IST"], format="%d-%m-%Y %H:%M")

# Convert sentiment timestamp (UNIX -> datetime)
sent["timestamp"] = pd.to_datetime(sent["timestamp"], unit="s")
sent.rename(columns={"timestamp": "Sentiment_Time"}, inplace=True)

# Useful columns
trader = trader[[
    "Account","Coin","Execution Price","Size Tokens","Size USD","Side",
    "Timestamp IST","Closed PnL","Fee"
]]
sent = sent[["Sentiment_Time","value","classification","date"]]

# Add date columns for merging
trader["Trade_Date"] = trader["Timestamp IST"].dt.date
sent["Sentiment_Date"] = pd.to_datetime(sent["date"]).dt.date


In [None]:
# Merge trader data with sentiment data by date
merged = pd.merge(
    trader,
    sent,
    left_on="Trade_Date",
    right_on="Sentiment_Date",
    how="left"
)

print("Merged data shape:", merged.shape)
display(merged.head())


In [None]:
# Encode side (BUY=1, SELL=-1)
merged["Side_Num"] = merged["Side"].map({"BUY": 1, "SELL": -1})

# Positive/Negative PnL label
merged["PnL_Positive"] = (merged["Closed PnL"] > 0).astype(int)

# Sentiment encoding
merged["Sentiment_Score"] = merged["value"]
merged["Sentiment_Label"] = merged["classification"].astype("category").cat.codes

# Features for ML
features = [
    "Execution Price","Size Tokens","Size USD","Fee",
    "Side_Num","Sentiment_Score","Sentiment_Label"
]
target = "PnL_Positive"

X = merged[features].fillna(0)
y = merged[target]


In [None]:
merged.columns = merged.columns.str.strip()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))


In [None]:
import matplotlib.pyplot as plt

# Avg PnL vs sentiment
plt.figure(figsize=(8,5))
merged.groupby("Sentiment_Label")["Closed PnL"].mean().plot(
    kind="bar", color="skyblue"
)
plt.title("Average Closed PnL by Sentiment")
plt.xlabel("Sentiment")
plt.ylabel("Avg Closed PnL")
plt.show()


In [None]:
# Save merged file
merged.to_csv("ds_Umar_Ansari/csv_files/merged_trader_sentiment.csv", index=False)
print("Saved merged dataset to csv_files/merged_trader_sentiment.csv")