In [23]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [2]:
folder_path = "/content/drive/MyDrive/ds_YashPathak/"

In [7]:
df_trader = pd.read_csv(folder_path + "historical_data.csv")
df_sentiment = pd.read_csv(folder_path + "fear_greed_index.csv")

In [None]:
print(df_trader.info())
print(df_sentiment.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211224 entries, 0 to 211223
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Account           211224 non-null  object 
 1   Coin              211224 non-null  object 
 2   Execution Price   211224 non-null  float64
 3   Size Tokens       211224 non-null  float64
 4   Size USD          211224 non-null  float64
 5   Side              211224 non-null  object 
 6   Timestamp IST     211224 non-null  object 
 7   Start Position    211224 non-null  float64
 8   Direction         211224 non-null  object 
 9   Closed PnL        211224 non-null  float64
 10  Transaction Hash  211224 non-null  object 
 11  Order ID          211224 non-null  int64  
 12  Crossed           211224 non-null  bool   
 13  Fee               211224 non-null  float64
 14  Trade ID          211224 non-null  float64
 15  Timestamp         211224 non-null  float64
dtypes: bool(1), float64(

In [None]:
print(df_trader.head())

                                      Account  Coin  Execution Price  \
0  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9769   
1  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9800   
2  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9855   
3  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9874   
4  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9894   

   Size Tokens  Size USD Side     Timestamp IST  Start Position Direction  \
0       986.87   7872.16  BUY  02-12-2024 22:50        0.000000       Buy   
1        16.00    127.68  BUY  02-12-2024 22:50      986.524596       Buy   
2       144.09   1150.63  BUY  02-12-2024 22:50     1002.518996       Buy   
3       142.98   1142.04  BUY  02-12-2024 22:50     1146.558564       Buy   
4         8.73     69.75  BUY  02-12-2024 22:50     1289.488521       Buy   

   Closed PnL                                   Transaction Hash     Order ID  \
0         0.0  0xec0945

Pre-processing the Sentiment Data


In [12]:
# Preprocess the Sentiment data
df_sentiment['date'] = pd.to_datetime(df_sentiment['date'])

In [10]:
def clean_sentiment(classification):
  if 'Fear' in classification:
    return 'Fear'
  elif 'Greed' in classification:
    return 'Greed'
  return 'Neutral'

df_sentiment['Classification_Clean'] = df_sentiment['classification'].apply(clean_sentiment)


In [14]:
# Filtering out 'Neutral' to focus on "fear" and "greed"
df_sentiment_clean = df_sentiment[df_sentiment['Classification_Clean'] != 'Neutral'].copy()

sentiment_mapping = {'Greed': 1, 'Fear': 0}
df_sentiment_clean['Sentiment_Score'] = df_sentiment_clean['Classification_Clean'].map(sentiment_mapping)

df_sentiment_final = df_sentiment_clean[['date', 'Classification_Clean', 'Sentiment_Score']].copy()
df_sentiment_final = df_sentiment_final.rename(columns={'Classification_Clean': 'Classification'})

Now for the Trader Data

In [15]:
df_trader['time'] = pd.to_datetime(df_trader['Timestamp IST'], format='%d-%m-%Y %H:%M', errors='coerce')
df_trader = df_trader.dropna(subset=['time'])

In [16]:
df_trader['Trade_Date'] = df_trader['time'].dt.date.apply(pd.to_datetime)
df_trader['Signed_Volume'] = df_trader.apply(
    lambda row: row['Size USD'] if row['Direction'] == 'Buy' else -row['Size USD'],
    axis=1
)


In [19]:
# Daily Aggregation
daily_metrics = df_trader.groupby('Trade_Date').agg(
    Total_Daily_PnL=('Closed PnL', 'sum'),
    Total_Daily_Volume=('Size USD', 'sum'),
    Net_Trade_Flow=('Signed_Volume', 'sum'),
    Trade_Count=('Account', 'count')
).reset_index()
daily_metrics = daily_metrics.rename(columns={'Trade_Date': 'date'})

In [21]:
# merging the datasets
df_merged = pd.merge(
    daily_metrics,
    df_sentiment_final,
    on='date',
    how='inner' # Only keep dates where both sentiment and trading data exist
)

output_file_name = 'daily_merged_data_for_analysis.csv'
df_merged.to_csv(output_file_name, index=False)

 **Comparative analysis**

In [25]:
sns.set_style("whitegrid")

In [27]:
# loading the merged dataset
df_merged = pd.read_csv("daily_merged_data_for_analysis.csv")
df_merged['date'] = pd.to_datetime(df_merged['date'])

In [28]:
OUTPUT_DIR = 'outputs'
os.makedirs(OUTPUT_DIR, exist_ok = True)
print(f"Created output directory: {OUTPUT_DIR}/")

Created output directory: outputs/


In [33]:
print("----- Statiscal Comparison of Fear vs Greed ------ ")

analysis_metrics = {
    'Total_Daily_PnL': 'Total PnL',
    'Total_Daily_Volume': 'Total Volume (USD)',
    'Trade_Count': 'Trade Count',
    'Net_Trade_Flow': 'Net Trade Flow (USD)'
}
results = {}

for col, label in analysis_metrics.items():
    fear_data = df_merged[df_merged['Classification'] == 'Fear'][col].dropna()
    greed_data = df_merged[df_merged['Classification'] == 'Greed'][col].dropna()

    if len(fear_data) > 1 and len(greed_data) > 1:
        # Perform Independent Two-Sample T-test
        t_stat, p_value = stats.ttest_ind(fear_data, greed_data, equal_var=False)

        # Calculate Mean and Standard Deviation
        mean_fear = fear_data.mean()
        mean_greed = greed_data.mean()

        results[col] = {
            'Mean_Fear': f"{mean_fear:,.2f}",
            'Mean_Greed': f"{mean_greed:,.2f}",
            'T_Statistic': f"{t_stat:.3f}",
            'P_Value': f"{p_value:.5f}",
            'Significant': 'Yes' if p_value < 0.05 else 'No'
        }
        print(f"\nMetric: {label}")
        print(f"  P-value: {p_value:.5f} (Significant: {results[col]['Significant']})")

df_results = pd.DataFrame(results).T
print("\n--- T-Test Results Summary (Is Mean(Fear) statistically different from Mean(Greed)?) ---")
print(df_results)

----- Statiscal Comparison of Fear vs Greed ------ 

Metric: Total PnL
  P-value: 0.02458 (Significant: Yes)

Metric: Total Volume (USD)
  P-value: 0.00003 (Significant: Yes)

Metric: Trade Count
  P-value: 0.00002 (Significant: Yes)

Metric: Net Trade Flow (USD)
  P-value: 0.00003 (Significant: Yes)

--- T-Test Results Summary (Is Mean(Fear) statistically different from Mean(Greed)?) ---
                        Mean_Fear     Mean_Greed T_Statistic  P_Value  \
Total_Daily_PnL         39,012.05      15,847.88       2.273  0.02458   
Total_Daily_Volume   5,693,419.54   1,345,432.11       4.343  0.00003   
Trade_Count                792.73         294.12       4.454  0.00002   
Net_Trade_Flow      -5,570,759.89  -1,213,680.73      -4.389  0.00003   

                   Significant  
Total_Daily_PnL            Yes  
Total_Daily_Volume         Yes  
Trade_Count                Yes  
Net_Trade_Flow             Yes  


**Visualizating the Analysis**

In [34]:
print("-------- Generating Visualizations --------")

for i, (col, title) in enumerate(analysis_metrics.items()):
    plt.figure(figsize=(8, 5))

    sns.boxplot(
        x='Classification',
        y=col,
        data=df_merged,
        palette={'Fear': 'skyblue', 'Greed': 'lightcoral'}
    )

    plt.title(f'{title} Distribution: Fear vs. Greed Days', fontsize=14)
    plt.xlabel('Market Sentiment', fontsize=12)
    plt.ylabel(title, fontsize=12)

    plot_filename = os.path.join(OUTPUT_DIR, f'fig_{i+1}_{col}_boxplot.png')
    plt.savefig(plot_filename)
    print(f"Saved: {plot_filename}")

plt.close('all')
print("\nPhase 3 complete. Statistical results and visual outputs are ready.")

-------- Generating Visualizations --------



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(


Saved: outputs/fig_1_Total_Daily_PnL_boxplot.png



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(


Saved: outputs/fig_2_Total_Daily_Volume_boxplot.png



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(


Saved: outputs/fig_3_Trade_Count_boxplot.png
Saved: outputs/fig_4_Net_Trade_Flow_boxplot.png

Phase 3 complete. Statistical results and visual outputs are ready.



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
