In [None]:
# ===============================================================
#  Data Science Internship Project
# Trader Behavior vs Market Sentiment Analysis
# Author: Abhishek Pratap Singh
# ===============================================================

# ---------------------------------------------------------------
#  Step 1: Setup Project Structure
# ---------------------------------------------------------------
!mkdir -p ds_abhishek_pratap_singh/csv_files
!mkdir -p ds_abhishek_pratap_singh/outputs

# Verify directory
!ls ds_abhishek_pratap_singh

# Install dependency for Google Drive file download
!pip install gdown



In [None]:
# ---------------------------------------------------------------
#  Step 2: Download Datasets from Google Drive
# ---------------------------------------------------------------
import gdown

# Fear & Greed Index Data
gdown.download(
    'https://drive.google.com/uc?id=1PgQC0tO8XN-wqkNyghWc_-mnrYv_nhSf',
    'ds_abhishek_pratap_singh/csv_files/fear_greed_index.csv',
    quiet=False
)

# Historical Trader Data
gdown.download(
    'https://drive.google.com/uc?id=1IAfLZwu6rJzyWKgBToqwSmmVYU6VbjVs',
    'ds_abhishek_pratap_singh/csv_files/trader_data.csv',
    quiet=False
)



In [None]:
# ---------------------------------------------------------------
#  Step 3: Load and Inspect Fear & Greed Index Data
# ---------------------------------------------------------------
import pandas as pd

sentiment_df = pd.read_csv('ds_abhishek_pratap_singh/csv_files/fear_greed_index.csv')
sentiment_df.head()


In [None]:
# Convert timestamp â†’ datetime
sentiment_df['timestamp'] = pd.to_datetime(sentiment_df['timestamp'], unit='s')

# Ensure 'date' is also datetime
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])

# Quick check
sentiment_df.info()


In [None]:
# ---------------------------------------------------------------
#  Step 4: Visualize Sentiment Trend
# ---------------------------------------------------------------
import matplotlib.pyplot as plt

plt.figure(figsize=(10,5))
plt.plot(sentiment_df['date'], sentiment_df['value'], marker='o')
plt.title('Bitcoin Fear & Greed Index Over Time')
plt.xlabel('Date')
plt.ylabel('Sentiment Value (0 = Fear, 100 = Greed)')
plt.grid(True)
plt.show()


In [None]:
# Save cleaned sentiment data
sentiment_df.to_csv('ds_abhishek_pratap_singh/csv_files/fear_greed_cleaned.csv', index=False)


In [None]:
# ---------------------------------------------------------------
#  Step 5: Load Trader Data
# ---------------------------------------------------------------
trader_df = pd.read_csv('ds_abhishek_pratap_singh/csv_files/trader_data.csv')

trader_df.info()
trader_df.head()


In [None]:
# ---------------------------------------------------------------
#  Step 6: Clean and Prepare Trader Data
# ---------------------------------------------------------------
df = trader_df.copy()

# Convert Timestamp IST â†’ datetime
df['Timestamp IST'] = pd.to_datetime(df['Timestamp IST'], errors='coerce')

# Create daily date column
df['date'] = df['Timestamp IST'].dt.date

# Convert numeric columns safely
num_cols = ['Execution Price', 'Size USD', 'Closed PnL', 'Fee']
df[num_cols] = df[num_cols].apply(pd.to_numeric, errors='coerce')

# Drop incomplete rows
df.dropna(subset=['Execution Price', 'Size USD', 'Closed PnL'], inplace=True)

df.info()


In [None]:
# ---------------------------------------------------------------
#  Step 7: Fix UNIX Timestamp (if needed)
# ---------------------------------------------------------------
temp_col = df['Timestamp'].copy()
temp_col = pd.to_numeric(temp_col, errors='coerce')
fixed_timestamp = pd.to_datetime(temp_col / 1000, unit='s', errors='coerce')

df['Timestamp'] = fixed_timestamp
df['Timestamp IST'] = df['Timestamp IST'].fillna(df['Timestamp'])
df['date'] = df['Timestamp IST'].dt.date

df[['Timestamp IST', 'Timestamp', 'date']].head()
df.info()


In [None]:
# ---------------------------------------------------------------
#  Step 8: Create Daily Summary Metrics
# ---------------------------------------------------------------
daily_summary = df.groupby('date').agg({
    'Closed PnL': 'sum',
    'Size USD': 'sum',
    'Execution Price': 'mean',
    'Account': 'nunique'
}).reset_index()

daily_summary.rename(columns={
    'Closed PnL': 'Total_PnL',
    'Size USD': 'Total_Volume_USD',
    'Execution Price': 'Avg_Execution_Price',
    'Account': 'Unique_Traders'
}, inplace=True)

daily_summary.head()


In [None]:
# ---------------------------------------------------------------
#  Step 9: Merge Trader Data with Sentiment Data
# ---------------------------------------------------------------
sentiment_df['date'] = pd.to_datetime(sentiment_df['date']).dt.date

merged_df = daily_summary.merge(
    sentiment_df[['date', 'classification', 'value']],
    on='date',
    how='left'
)

merged_df.head()


In [None]:
# ---------------------------------------------------------------
#  Step 10: Summary Statistics
# ---------------------------------------------------------------
print("Average PnL by Sentiment:\n", merged_df.groupby('classification')['Total_PnL'].mean())
print("\nAverage Volume by Sentiment:\n", merged_df.groupby('classification')['Total_Volume_USD'].mean())
print("\nCorrelation between Sentiment Value and PnL:\n", merged_df[['value', 'Total_PnL']].corr())


In [None]:
# ---------------------------------------------------------------
#  STEP 11: Visualizations
# ---------------------------------------------------------------
import seaborn as sns

# --- 1. Trader Profitability vs Sentiment ---
plt.figure(figsize=(8,5))
sns.boxplot(data=merged_df, x='classification', y='Total_PnL',
            palette='coolwarm',
            order=['Extreme Fear', 'Fear', 'Neutral', 'Greed', 'Extreme Greed'])
plt.title('Trader Profitability vs Market Sentiment')
plt.xlabel('Market Sentiment')
plt.ylabel('Total Profit & Loss (USD)')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('ds_abhishek_pratap_singh/outputs/pnl_by_sentiment.png', dpi=300)
plt.show()


In [None]:
# --- 2. Average Trading Volume vs Sentiment ---
plt.figure(figsize=(8,5))
sns.barplot(
    data=merged_df,
    x='classification',
    y='Total_Volume_USD',
    estimator='mean',
    palette='viridis',
    order=['Extreme Fear', 'Fear', 'Neutral', 'Greed', 'Extreme Greed']
)
plt.title('Average Daily Trading Volume vs Market Sentiment')
plt.xlabel('Market Sentiment')
plt.ylabel('Average Trading Volume (USD)')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('ds_abhishek_pratap_singh/outputs/volume_by_sentiment.png', dpi=300)
plt.show()


In [None]:
# --- 3. Sentiment Value vs Profit ---
plt.figure(figsize=(8,5))
sns.regplot(
    data=merged_df,
    x='value',
    y='Total_PnL',
    scatter_kws={'alpha':0.5},
    line_kws={'color':'red'}
)
plt.title('Relationship Between Market Sentiment Value and Trader Profitability')
plt.xlabel('Market Sentiment Value (0 = Fear, 100 = Greed)')
plt.ylabel('Total Daily Profit (USD)')
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.savefig('ds_abhishek_pratap_singh/outputs/sentiment_vs_profit.png', dpi=300)
plt.show()


In [None]:
# ---------------------------------------------------------------
# ðŸ’¾ Step 12: Save Final Merged Dataset
# ---------------------------------------------------------------
merged_df.to_csv('ds_abhishek_pratap_singh/csv_files/merged_trader_sentiment.csv', index=False)

print("âœ…âœ…âœ… All tasks complete. Cleaned and merged dataset saved successfully!")
