In [1]:
import os

ROOT = "ds_abhay_kumar_banjare"
CSV = f"{ROOT}/csv_files"
OUT = f"{ROOT}/outputs"

for d in [ROOT, CSV, OUT]:
    os.makedirs(d, exist_ok=True)

print("Folders created:")
print(ROOT)
print(CSV)
print(OUT)


Folders created:
ds_abhay_kumar_banjare
ds_abhay_kumar_banjare/csv_files
ds_abhay_kumar_banjare/outputs


In [2]:
pip install reportlab

Collecting reportlab
  Downloading reportlab-4.4.5-py3-none-any.whl.metadata (1.7 kB)
Downloading reportlab-4.4.5-py3-none-any.whl (2.0 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m1.0/2.0 MB[0m [31m32.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m37.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reportlab
Successfully installed reportlab-4.4.5


In [3]:
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet

REPORT_TEXT = """
# Data Science Project Report
## Trader Sentiment Analysis

### Introduction
This project analyzes trader sentiment and its relationship with market performance using historical data and sentiment scores.

### Data
- Fear Grid Index
- Historical trading data
- Sentiment datasets

### Methodology
- Data cleaning and aggregation
- Sentiment analysis
- Statistical tests (t-tests, KPI summaries)
- Visualization of results

### Results
- CSV outputs stored in `csv_files/`
- Charts stored in `outputs/`
- Notebook contains full code and workflow

### Conclusion
Trader sentiment shows measurable patterns that align with market movements.
This project demonstrates how data science can be applied to financial sentiment analysis.

- Abhay Kumar Banjare
"""

def generate_pdf_report(filename, content):
    doc = SimpleDocTemplate(filename, pagesize=letter)
    styles = getSampleStyleSheet()
    story = []

    # Split the content into lines and format them
    for line in content.split('\n'):
        if line.startswith('# '):
            story.append(Paragraph(line.replace('# ', ''), styles['h1']))
        elif line.startswith('## '):
            story.append(Paragraph(line.replace('## ', ''), styles['h2']))
        elif line.startswith('### '):
            story.append(Paragraph(line.replace('### ', ''), styles['h3']))
        elif line.startswith('- '):
            story.append(Paragraph(line, styles['Normal'], bulletText='\u2022')) # Using bullet point for list items
        else:
            story.append(Paragraph(line, styles['Normal']))
        story.append(Spacer(1, 0.2 * 10))

    doc.build(story)
    print(f"PDF report '{filename}' created successfully.")


output_pdf_path = "/content/ds_abhay_kumar_banjare/ds_report.pdf"
generate_pdf_report(output_pdf_path, REPORT_TEXT)


PDF report '/content/ds_abhay_kumar_banjare/ds_report.pdf' created successfully.


In [4]:
!pip install gdown
import gdown

TRADER_ID = "1IAfLZwu6rJzyWKgBToqwSmmVYU6VbjVs"
SENT_ID   = "1PgQC0tO8XN-wqkNyghWc_-mnrYv_nhSf"

TRADER_RAW = f"{CSV}/trader_data_raw.csv"
SENT_RAW   = f"{CSV}/sentiment_raw.csv"

gdown.download(id=TRADER_ID, output=TRADER_RAW, quiet=False)
gdown.download(id=SENT_ID, output=SENT_RAW, quiet=False)




Downloading...
From: https://drive.google.com/uc?id=1IAfLZwu6rJzyWKgBToqwSmmVYU6VbjVs
To: /content/ds_abhay_kumar_banjare/csv_files/trader_data_raw.csv
100%|██████████| 47.5M/47.5M [00:01<00:00, 44.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=1PgQC0tO8XN-wqkNyghWc_-mnrYv_nhSf
To: /content/ds_abhay_kumar_banjare/csv_files/sentiment_raw.csv
100%|██████████| 90.8k/90.8k [00:00<00:00, 2.74MB/s]


'ds_abhay_kumar_banjare/csv_files/sentiment_raw.csv'

In [5]:
import pandas as pd
import numpy as np

trader = pd.read_csv(TRADER_RAW)
sent   = pd.read_csv(SENT_RAW)

sent = sent.rename(columns={'date':'Date','classification':'Classification'})
trader = trader.rename(columns={
    'Execution Price':'execution_price',
    'Size Tokens':'size',
    'Start Position':'start_position',
    'Closed PnL':'closedPnL'
})

trader['leverage'] = 1.0

print("Trader DataFrame columns:", trader.columns)

trader['time'] = pd.to_datetime(trader['Timestamp IST'], errors='coerce') # Corrected: using 'Timestamp IST' column
trader['date'] = trader['time'].dt.date
sent['Date']   = pd.to_datetime(sent['Date']).dt.date

sent['Classification'] = sent['Classification'].astype(str).str.strip().str.title()

TRADER_CLEAN = f"{CSV}/trader_data_clean.csv"
trader.to_csv(TRADER_CLEAN, index=False)

Trader DataFrame columns: Index(['Account', 'Coin', 'execution_price', 'size', 'Size USD', 'Side',
       'Timestamp IST', 'start_position', 'Direction', 'closedPnL',
       'Transaction Hash', 'Order ID', 'Crossed', 'Fee', 'Trade ID',
       'Timestamp', 'leverage'],
      dtype='object')


In [6]:
trader['notional'] = trader['execution_price'] * trader['size']
trader['risk_exposure'] = trader['leverage'] * trader['notional']
trader['pnl_rate'] = np.where(trader['notional']>0,
                              trader['closedPnL']/trader['notional'], np.nan)

daily = trader.groupby('date').agg(
    total_trades=('Account','count'),
    total_notional=('notional','sum'),
    avg_leverage=('leverage','mean'),
    total_pnl=('closedPnL','sum'),
    avg_pnl=('closedPnL','mean'),
    avg_pnl_rate=('pnl_rate','mean')
).reset_index()

DAILY_METRICS = f"{CSV}/agg_daily_metrics.csv"
daily.to_csv(DAILY_METRICS, index=False)

In [7]:
merged = pd.merge(
    daily.rename(columns={'date':'Date'}),
    sent[['Date','Classification']],
    on='Date',
    how='left'
)

DAILY_SENT = f"{CSV}/daily_with_sentiment.csv"
merged.to_csv(DAILY_SENT, index=False)


In [8]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

def save_fig(name):
    path = f"{OUT}/{name}"
    plt.tight_layout()
    plt.savefig(path, dpi=140)
    plt.close()
    print("Saved:", path)

# PnL by sentiment
plt.figure(figsize=(8,6))
sns.boxplot(data=merged.dropna(subset=['Classification']), x='Classification', y='total_pnl')
plt.title("PnL by Sentiment")
save_fig("pnl_by_sentiment.png")

# Leverage over time
plt.figure(figsize=(10,6))
sns.lineplot(data=merged.sort_values('Date'), x='Date', y='avg_leverage')
plt.title("Leverage Over Time")
save_fig("leverage_over_time.png")

# Notional by sentiment
plt.figure(figsize=(8,6))
sns.boxplot(data=merged.dropna(subset=['Classification']), x='Classification', y='total_notional')
plt.title("Notional by Sentiment")
save_fig("notional_by_sentiment.png")

Saved: ds_abhay_kumar_banjare/outputs/pnl_by_sentiment.png
Saved: ds_abhay_kumar_banjare/outputs/leverage_over_time.png
Saved: ds_abhay_kumar_banjare/outputs/notional_by_sentiment.png


In [9]:
from scipy import stats

def ttest_metric(metric):
    df = merged.dropna(subset=[metric,'Classification'])
    fear  = df.loc[df['Classification']=='Fear', metric]
    greed = df.loc[df['Classification']=='Greed', metric]
    t, p = stats.ttest_ind(greed, fear, equal_var=False, nan_policy='omit')
    return {'metric':metric,
            'mean_greed':float(np.nanmean(greed)),
            'mean_fear':float(np.nanmean(fear)),
            't_stat':float(t),
            'p_val':float(p)}

results = []
for m in ['total_pnl','avg_leverage','total_notional','avg_pnl_rate']:
    results.append(ttest_metric(m))

ttests = pd.DataFrame(results)
TTESTS_PATH = f"{CSV}/ttests_alignment.csv"
ttests.to_csv(TTESTS_PATH, index=False)


  res = hypotest_fun_out(*samples, **kwds)


In [10]:
sent_map = sent.set_index('Date')['Classification']

trader_processed = trader.copy()

trader_processed['Classification'] = trader_processed['date'].map(sent_map)
trader_processed['sent_code'] = trader_processed['Classification'].map({'Greed':1,'Fear':-1})

trader_processed['notional'] = trader_processed['execution_price'] * trader_processed['size']
trader_processed['risk_exposure'] = trader_processed['leverage'] * trader_processed['notional']
trader_processed['pnl_rate'] = np.where(trader_processed['notional']>0,
                              trader_processed['closedPnL']/trader_processed['notional'], np.nan)

trader_processed = trader_processed.dropna(subset=['Direction','sent_code'])
trader_processed['aligned'] = (trader_processed['Direction'] == trader_processed['sent_code']).astype(int)

aligned_perf = trader_processed.groupby('aligned').agg(
    trades=('Account','count'),
    total_pnl=('closedPnL','sum'),
    avg_pnl=('closedPnL','mean'),
    avg_pnl_rate=('pnl_rate','mean')
).reset_index()

ALIGNED_PATH = f"{CSV}/aligned_vs_contrarian.csv"
aligned_perf.to_csv(ALIGNED_PATH, index=False)

# Chart
plt.figure(figsize=(8,6))
sns.barplot(data=aligned_perf, x='aligned', y='avg_pnl_rate')
plt.xticks([0,1], ['Contrarian','Aligned'])
plt.title("Contrarian vs Aligned PnL Rate")
save_fig("contrarian_vs_aligned_pnlrate.png")

Saved: ds_abhay_kumar_banjare/outputs/contrarian_vs_aligned_pnlrate.png


In [11]:
summary = {
    'Greed_days': int((merged['Classification']=='Greed').sum()),
    'Fear_days': int((merged['Classification']=='Fear').sum()),
    'Avg_total_pnl_Greed': float(merged.loc[merged['Classification']=='Greed','total_pnl'].mean()),
    'Avg_total_pnl_Fear': float(merged.loc[merged['Classification']=='Fear','total_pnl'].mean()),
    'Avg_leverage_Greed': float(merged.loc[merged['Classification']=='Greed','avg_leverage'].mean()),
    'Avg_leverage_Fear': float(merged.loc[merged['Classification']=='Fear','avg_leverage'].mean())
}
summary_df = pd.DataFrame([summary])
SUMMARY_PATH = f"{CSV}/summary_kpis.csv"
summary_df.to_csv(SUMMARY_PATH, index=False)


# Task
To update `README.md` with the new project structure and usage details, first read the existing content, then append the new information, and finally write the complete content back to `README.md`.

## Append to README.md

### Subtask:
Read the current content of `README.md`, append the new project structure and usage details, and then write the updated content back to the `README.md` file.


**Reasoning**:
I need to define the content to be appended to the `README.md` file, including details about the project structure and generated files. This content will be stored in a string variable.



In [12]:
readme_content_to_add = """
## Project Structure

- `ds_abhay_kumar_banjare/`: The root directory for the project.
    - `csv_files/`: Contains all CSV data files.
        - `trader_data_raw.csv`: Raw trader data downloaded from Google Drive.
        - `sentiment_raw.csv`: Raw sentiment data downloaded from Google Drive.
        - `trader_data_clean.csv`: Cleaned and preprocessed trader data.
        - `agg_daily_metrics.csv`: Daily aggregated trading metrics.
        - `daily_with_sentiment.csv`: Daily metrics merged with sentiment data.
        - `ttests_alignment.csv`: Results of t-tests for sentiment alignment.
        - `aligned_vs_contrarian.csv`: Performance metrics for aligned vs. contrarian trades.
        - `summary_kpis.csv`: Key Performance Indicators summary by sentiment.
    - `outputs/`: Stores all generated plots and figures.
        - `pnl_by_sentiment.png`: Box plot of PnL by sentiment.
        - `leverage_over_time.png`: Line plot of average leverage over time.
        - `notional_by_sentiment.png`: Box plot of notional by sentiment.
        - `contrarian_vs_aligned_pnlrate.png`: Bar plot comparing PnL rates for contrarian vs. aligned trades.
    - `ds_report.pdf`: A PDF report summarizing the project.

## Usage

This project analyzes trader sentiment using historical data. The Jupyter Notebook (`.ipynb` file) contains the full workflow, including data loading, cleaning, analysis, visualization, and report generation.

To run the project:
1. Ensure you have Python and Jupyter installed.
2. Install the required libraries (`pandas`, `numpy`, `matplotlib`, `seaborn`, `scipy`, `reportlab`, `gdown`).
3. Execute the cells in the Jupyter Notebook sequentially.
4. The raw data will be downloaded, processed, and analysis results (CSV files and plots) will be saved in the respective `csv_files/` and `outputs/` directories.
"""

print("Defined the content to be added to README.md.")

Defined the content to be added to README.md.


**Reasoning**:
Now that the new content for `README.md` is defined, I will read the existing `README.md` file. If it doesn't exist, I'll start with an empty string to ensure I don't get an error, then append the new content, and finally write the combined content back to the `README.md` file in the root directory.



In [13]:
readme_path = f"{ROOT}/README.md"

try:
    with open(readme_path, 'r') as f:
        existing_readme_content = f.read()
except FileNotFoundError:
    existing_readme_content = "# Data Science Project Report\n"

full_readme_content = existing_readme_content + readme_content_to_add

with open(readme_path, 'w') as f:
    f.write(full_readme_content)

print(f"Successfully updated {readme_path}")

Successfully updated ds_abhay_kumar_banjare/README.md


## Final Task

### Subtask:
Confirm that the `README.md` file has been updated successfully with the new information.


## Summary:

### Q&A
*   **Was the `README.md` file updated successfully with the new information?**
    Yes, the `README.md` file in the project's root directory (`ds_abhay_kumar_banjare/README.md`) has been successfully updated with the specified project structure and usage details.

### Data Analysis Key Findings
*   A comprehensive block of text containing detailed project structure and usage instructions was prepared for inclusion in `README.md`.
*   The existing content of `README.md` was retrieved; if the file was not found, a default title was used as a starting point.
*   The newly prepared content was successfully appended to the existing `README.md` content.
*   The combined, updated content was written back to the `README.md` file at `ds_abhay_kumar_banjare/README.md`.

### Insights or Next Steps
*   The updated `README.md` now provides clear documentation for anyone wanting to understand or replicate the project, detailing file organization and execution steps.
*   The next step could involve manually reviewing the updated `README.md` file to ensure formatting and content accuracy for optimal readability.
