<a href="https://colab.research.google.com/github/Kedaresh07/ds_Kedaresh_Inamdar/blob/main/notebook_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Project Setup for ds_Kedaresh_Inamdar

import os

# Create main directories for this project
base_dir = "/content/ds_Kedaresh_Inamdar"
os.makedirs(base_dir, exist_ok=True)
os.makedirs(f"{base_dir}/csv_files", exist_ok=True)
os.makedirs(f"{base_dir}/outputs", exist_ok=True)

print(" Folder structure created successfully:\n")
for root, dirs, files in os.walk(base_dir):
    level = root.replace(base_dir, '').count(os.sep)
    indent = ' ' * 4 * (level)
    print(f"{indent}{os.path.basename(root)}/")


 Folder structure created successfully:

ds_Kedaresh_Inamdar/
    outputs/
    csv_files/


In [None]:
# Step 1 continued: Library setup
!pip install -q pandas numpy matplotlib seaborn gdown

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

print(" Libraries installed and imported successfully!")


 Libraries installed and imported successfully!


In [None]:
# Step 2: Download and Load the Datasets

import gdown
import pandas as pd

base_dir = "/content/ds_Kedaresh_Inamdar/csv_files"

# Google Drive file IDs from your assignment
trader_file_id = "1IAfLZwu6rJzyWKgBToqwSmmVYU6VbjVs"
sentiment_file_id = "1PgQC0tO8XN-wqkNyghWc_-mnrYv_nhSf"

# Output paths
trader_path = f"{base_dir}/trader_data.csv"
sentiment_path = f"{base_dir}/fear_greed.csv"

# Download files from Google Drive
print("⬇ Downloading Trader Data...")
gdown.download(f"https://drive.google.com/uc?id={trader_file_id}", trader_path, quiet=False)

print("\n⬇ Downloading Fear-Greed Sentiment Data...")
gdown.download(f"https://drive.google.com/uc?id={sentiment_file_id}", sentiment_path, quiet=False)

# Load CSVs into DataFrames
print("\n Reading CSV files...")
trader_df = pd.read_csv(trader_path)
sentiment_df = pd.read_csv(sentiment_path)

print("\n Files successfully loaded!")
print("Trader Data shape:", trader_df.shape)
print("Sentiment Data shape:", sentiment_df.shape)

# Preview first few rows
display(trader_df.head())
display(sentiment_df.head())


⬇ Downloading Trader Data...


Downloading...
From: https://drive.google.com/uc?id=1IAfLZwu6rJzyWKgBToqwSmmVYU6VbjVs
To: /content/ds_Kedaresh_Inamdar/csv_files/trader_data.csv
100%|██████████| 47.5M/47.5M [00:00<00:00, 51.7MB/s]



⬇ Downloading Fear-Greed Sentiment Data...


Downloading...
From: https://drive.google.com/uc?id=1PgQC0tO8XN-wqkNyghWc_-mnrYv_nhSf
To: /content/ds_Kedaresh_Inamdar/csv_files/fear_greed.csv
100%|██████████| 90.8k/90.8k [00:00<00:00, 19.5MB/s]



 Reading CSV files...

 Files successfully loaded!
Trader Data shape: (211224, 16)
Sentiment Data shape: (2644, 4)


Unnamed: 0,Account,Coin,Execution Price,Size Tokens,Size USD,Side,Timestamp IST,Start Position,Direction,Closed PnL,Transaction Hash,Order ID,Crossed,Fee,Trade ID,Timestamp
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9769,986.87,7872.16,BUY,02-12-2024 22:50,0.0,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.345404,895000000000000.0,1730000000000.0
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.98,16.0,127.68,BUY,02-12-2024 22:50,986.524596,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.0056,443000000000000.0,1730000000000.0
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9855,144.09,1150.63,BUY,02-12-2024 22:50,1002.518996,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050431,660000000000000.0,1730000000000.0
3,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9874,142.98,1142.04,BUY,02-12-2024 22:50,1146.558564,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050043,1080000000000000.0,1730000000000.0
4,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9894,8.73,69.75,BUY,02-12-2024 22:50,1289.488521,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.003055,1050000000000000.0,1730000000000.0


Unnamed: 0,timestamp,value,classification,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03
3,1517722200,24,Extreme Fear,2018-02-04
4,1517808600,11,Extreme Fear,2018-02-05


In [None]:
# Step 3: Data Cleaning and Preparation

import pandas as pd
import numpy as np

# Make column names consistent (lowercase, no spaces)
trader_df.columns = trader_df.columns.str.strip().str.lower().str.replace(" ", "_")
sentiment_df.columns = sentiment_df.columns.str.strip().str.lower().str.replace(" ", "_")

print(" Column names standardized!")

# Display new column names
print("\nTrader data columns:", trader_df.columns.tolist())
print("Sentiment data columns:", sentiment_df.columns.tolist())

# --- Convert date/time ---
# Convert 'timestamp_ist' in trader data to datetime
if 'timestamp_ist' in trader_df.columns:
    trader_df['timestamp_ist'] = pd.to_datetime(trader_df['timestamp_ist'], errors='coerce', format='%d-%m-%Y %H:%M')

# Convert 'date' in sentiment data to datetime
if 'date' in sentiment_df.columns:
    sentiment_df['date'] = pd.to_datetime(sentiment_df['date'], errors='coerce')

# Create 'date_only' column to merge both
trader_df['date_only'] = trader_df['timestamp_ist'].dt.date
sentiment_df['date_only'] = sentiment_df['date'].dt.date

# --- Merge datasets ---
merged_df = pd.merge(trader_df, sentiment_df[['date_only', 'classification']],
                     on='date_only', how='left')

# --- Handle missing classifications ---
merged_df['classification'].fillna('Unknown', inplace=True)

print("\n Datasets merged successfully!")
print("Merged dataset shape:", merged_df.shape)

# Preview a few rows
display(merged_df.head())

# Save cleaned version for next step
merged_df.to_csv("/content/ds_Kedaresh_Inamdar/csv_files/merged_data.csv", index=False)
print("\n Cleaned and merged data saved to csv_files/merged_data.csv")

 Column names standardized!

Trader data columns: ['account', 'coin', 'execution_price', 'size_tokens', 'size_usd', 'side', 'timestamp_ist', 'start_position', 'direction', 'closed_pnl', 'transaction_hash', 'order_id', 'crossed', 'fee', 'trade_id', 'timestamp']
Sentiment data columns: ['timestamp', 'value', 'classification', 'date']

 Datasets merged successfully!
Merged dataset shape: (211224, 18)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['classification'].fillna('Unknown', inplace=True)


Unnamed: 0,account,coin,execution_price,size_tokens,size_usd,side,timestamp_ist,start_position,direction,closed_pnl,transaction_hash,order_id,crossed,fee,trade_id,timestamp,date_only,classification
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9769,986.87,7872.16,BUY,2024-12-02 22:50:00,0.0,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.345404,895000000000000.0,1730000000000.0,2024-12-02,Extreme Greed
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.98,16.0,127.68,BUY,2024-12-02 22:50:00,986.524596,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.0056,443000000000000.0,1730000000000.0,2024-12-02,Extreme Greed
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9855,144.09,1150.63,BUY,2024-12-02 22:50:00,1002.518996,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050431,660000000000000.0,1730000000000.0,2024-12-02,Extreme Greed
3,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9874,142.98,1142.04,BUY,2024-12-02 22:50:00,1146.558564,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050043,1080000000000000.0,1730000000000.0,2024-12-02,Extreme Greed
4,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9894,8.73,69.75,BUY,2024-12-02 22:50:00,1289.488521,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.003055,1050000000000000.0,1730000000000.0,2024-12-02,Extreme Greed



 Cleaned and merged data saved to csv_files/merged_data.csv


In [None]:
# Step 4: Data Analysis

import pandas as pd
import numpy as np

# Use the merged dataset created earlier
data = merged_df.copy()

# Ensure numerical columns are properly converted
# Corrected column names based on the available columns in 'data'
numeric_cols = ['closed_pnl', 'size_usd', 'execution_price'] # Assuming 'size_usd' is intended instead of 'size' and 'leverage' is not available
for col in numeric_cols:
    if col in data.columns:
        data[col] = pd.to_numeric(data[col], errors='coerce')

# Group data by market sentiment classification
# Corrected column names in the aggregation dictionary
summary = data.groupby('classification').agg({
    'closed_pnl': ['mean', 'median'],
    'size_usd': ['mean', 'median'], # Assuming 'size_usd' is intended instead of 'size'
    'execution_price': ['mean']
}).reset_index()

# Flatten MultiIndex columns
summary.columns = ['classification',
                   'avg_pnl', 'median_pnl',
                   'avg_size_usd', 'median_size_usd', # Assuming 'size_usd' is intended
                   'avg_exec_price']

print("Summary statistics by sentiment:")
display(summary)

# Save to CSV
summary.to_csv("/content/ds_Kedaresh_Inamdar/csv_files/summary_stats.csv", index=False)
print("Summary statistics saved to csv_files/summary_stats.csv")

# Correlation analysis
# Corrected column names in the correlation analysis
corr = data[['closed_pnl', 'size_usd', 'execution_price']].corr() # Assuming 'size_usd' is intended instead of 'size' and 'leverage' is not available
print("\nCorrelation matrix:")
display(corr)

# Save correlation matrix as CSV
corr.to_csv("/content/ds_Kedaresh_Inamdar/csv_files/correlation_matrix.csv")
print("Correlation matrix saved to csv_files/correlation_matrix.csv")

Summary statistics by sentiment:


Unnamed: 0,classification,avg_pnl,median_pnl,avg_size_usd,median_size_usd,avg_exec_price
0,Extreme Fear,34.537862,0.0,5349.731843,766.15,7054.795108
1,Extreme Greed,67.892861,0.0,3112.251565,500.05,6082.195865
2,Fear,54.2904,0.0,7816.109931,735.96,14152.620222
3,Greed,42.743559,0.0,5736.884375,555.0,13411.276344
4,Neutral,34.307718,0.0,4782.732661,547.655,12393.692779
5,Unknown,7078.665688,8057.245408,14778.143333,14798.105,2.649933


Summary statistics saved to csv_files/summary_stats.csv

Correlation matrix:


Unnamed: 0,closed_pnl,size_usd,execution_price
closed_pnl,1.0,0.123589,-0.006505
size_usd,0.123589,1.0,0.189855
execution_price,-0.006505,0.189855,1.0


Correlation matrix saved to csv_files/correlation_matrix.csv


In [None]:
# Step 5: Visualization of Results

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os

# Load summary and correlation data
summary = pd.read_csv("/content/ds_Kedaresh_Inamdar/csv_files/summary_stats.csv")
corr = pd.read_csv("/content/ds_Kedaresh_Inamdar/csv_files/correlation_matrix.csv", index_col=0)

# Set up output directory
output_dir = "/content/ds_Kedaresh_Inamdar/outputs"
os.makedirs(output_dir, exist_ok=True)

# Identify column names robustly
def find_col(df, candidates):
    for c in candidates:
        if c in df.columns:
            return c
    return None

class_col = find_col(summary, ['classification', 'class', 'sentiment'])
pnl_col = find_col(summary, ['avg_pnl', 'avg_profit', 'avg_closedpnl', 'avg_closed_pnl'])
size_col = find_col(summary, ['avg_size', 'avg_trade_size', 'avg_size_usd'])
exec_price_col = find_col(summary, ['avg_exec_price', 'avg_execution_price'])


missing = [name for name, col in [('classification', class_col), ('avg_pnl', pnl_col),
                                   ('avg_size_usd', size_col), ('avg_exec_price', exec_price_col)] if col is None]
if missing:
    raise ValueError(f"Missing expected columns in summary_stats.csv: {missing}. "
                     "If these names differ, recompute or rename them to the expected names.")

# Create a sensible order for sentiment categories (only those present will be used)
preferred_order = ['Extreme Fear', 'Fear', 'Neutral', 'Greed', 'Extreme Greed', 'Unknown']
present = list(summary[class_col].astype(str).unique())
ordered = [x for x in preferred_order if x in present] + sorted([x for x in present if x not in preferred_order])

# Common plotting settings
def plot_bar(x_col, y_col, title, ylabel, filename):
    plt.figure(figsize=(10,5))
    sns.barplot(data=summary, x=x_col, y=y_col, hue=x_col, order=ordered, legend=False)
    plt.title(title, fontsize=14)
    plt.xlabel("Market Sentiment", fontsize=12)
    plt.ylabel(ylabel, fontsize=12)
    plt.xticks(rotation=30, ha='right')
    plt.tight_layout()
    save_path = os.path.join(output_dir, filename)
    plt.savefig(save_path)
    plt.close()
    print("Saved:", save_path)

# 1. Average PnL by sentiment
plot_bar(class_col, pnl_col, "Average PnL by Market Sentiment", "Average Profit / Loss", "avg_pnl_by_sentiment.png")

# 2. Average Trade Size by sentiment
plot_bar(class_col, size_col, "Average Trade Size by Market Sentiment", "Average Trade Size (USD)", "avg_trade_size_by_sentiment.png")

# 3. Average Execution Price by sentiment
plot_bar(class_col, exec_price_col, "Average Execution Price by Market Sentiment", "Average Execution Price", "avg_execution_price_by_sentiment.png")


# 4. Correlation heatmap
plt.figure(figsize=(8,6))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f", cbar_kws={'shrink': .8})
plt.title("Correlation Heatmap - Trader Metrics", fontsize=14)
plt.tight_layout()
heatmap_path = os.path.join(output_dir, "correlation_heatmap.png")
plt.savefig(heatmap_path)
plt.close()
print("Saved:", heatmap_path)

# Final listing
print("\nFiles in outputs folder:")
for f in sorted(os.listdir(output_dir)):
    print(" -", f)

Saved: /content/ds_Kedaresh_Inamdar/outputs/avg_pnl_by_sentiment.png
Saved: /content/ds_Kedaresh_Inamdar/outputs/avg_trade_size_by_sentiment.png
Saved: /content/ds_Kedaresh_Inamdar/outputs/avg_execution_price_by_sentiment.png
Saved: /content/ds_Kedaresh_Inamdar/outputs/correlation_heatmap.png

Files in outputs folder:
 - .ipynb_checkpoints
 - avg_execution_price_by_sentiment.png
 - avg_pnl_by_sentiment.png
 - avg_trade_size_by_sentiment.png
 - correlation_heatmap.png
