# Daily Log Returns Analysis (2020-2024)

This notebook analyzes daily log returns for S&P 500, Gold, Bitcoin, Ethereum, and XRP from 2020 to 2024.
Log returns are used for better statistical properties and visualization compared to simple returns.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
plt.style.use('seaborn-v0_8-darkgrid')

print("Libraries imported successfully!")

## 1. Load the Data and Convert to Log Returns

In [None]:
# Load the combined data file
df = pd.read_csv('all_assets_daily_returns_2020_2024.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Convert percentage returns to log returns
# Formula: log_return = ln(1 + simple_return/100)
return_columns = [col for col in df.columns if col.endswith('_Return')]
assets = [col.replace('_Return', '') for col in return_columns]

for asset in assets:
    # Convert simple returns to log returns
    simple_returns = df[f'{asset}_Return'] / 100  # Convert percentage to decimal
    log_returns = np.log(1 + simple_returns) * 100  # Convert back to percentage for easier interpretation
    df[f'{asset}_LogReturn'] = log_returns

# Update column lists to use log returns
log_return_columns = [f'{asset}_LogReturn' for asset in assets]

print(f"Data shape: {df.shape}")
print(f"\nColumns: {list(df.columns)}")
print(f"\nDate range: {df.index.min()} to {df.index.max()}")
print(f"\nFirst few rows of log returns:")
df[log_return_columns].head()

## 2. Summary Statistics (Log Returns)

In [None]:
# Calculate summary statistics for log returns
summary_stats = pd.DataFrame(index=['Mean', 'Std Dev', 'Min', 'Max', 'Skewness', 'Kurtosis', 'Sharpe Ratio'])

for asset in assets:
    log_returns = df[f'{asset}_LogReturn'].dropna()
    summary_stats[asset] = [
        log_returns.mean(),
        log_returns.std(),
        log_returns.min(),
        log_returns.max(),
        log_returns.skew(),
        log_returns.kurtosis(),
        (log_returns.mean() / log_returns.std()) * np.sqrt(252)  # Annualized Sharpe Ratio
    ]

print("Summary Statistics for Daily Log Returns (%):\n")
summary_stats.round(4)

## 3. Correlation Analysis (Log Returns)

In [None]:
# Calculate correlation matrix for log returns
correlation_matrix = df[log_return_columns].corr()

# Create correlation heatmap
plt.figure(figsize=(10, 8))
mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
sns.heatmap(correlation_matrix, 
            mask=mask,
            annot=True, 
            fmt='.3f',
            cmap='RdBu_r', 
            center=0,
            square=True,
            linewidths=1,
            cbar_kws={"shrink": .8})
plt.title('Daily Log Returns Correlation Matrix (2020-2024)', fontsize=16, pad=20)
plt.tight_layout()
plt.show()

# Display correlation matrix
print("\nCorrelation Matrix (Log Returns):")
correlation_matrix

## 4. Log Return Distributions

In [None]:
# Plot log return distributions
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for i, asset in enumerate(assets):
    log_returns = df[f'{asset}_LogReturn'].dropna()
    axes[i].hist(log_returns, bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black')
    axes[i].set_title(f'{asset} Daily Log Returns Distribution', fontsize=12)
    axes[i].set_xlabel('Daily Log Return (%)')
    axes[i].set_ylabel('Density')
    
    # Add normal distribution overlay
    from scipy import stats
    x = np.linspace(log_returns.min(), log_returns.max(), 100)
    axes[i].plot(x, stats.norm.pdf(x, log_returns.mean(), log_returns.std()), 'r-', linewidth=2, label='Normal')
    axes[i].legend()

# Remove empty subplot
fig.delaxes(axes[-1])
plt.tight_layout()
plt.show()

## 5. Cumulative Returns (from Log Returns)

In [None]:
# Calculate cumulative returns from log returns
# Cumulative log return = sum of log returns
# Convert back to simple return for display: exp(cumulative_log_return) - 1
plt.figure(figsize=(14, 8))

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
for i, asset in enumerate(assets):
    log_returns = df[f'{asset}_LogReturn'] / 100  # Convert to decimal
    cumulative_log_returns = log_returns.cumsum()
    cumulative_simple_returns = (np.exp(cumulative_log_returns) - 1) * 100  # Convert to percentage
    plt.plot(df.index, cumulative_simple_returns, label=asset, color=colors[i], linewidth=2)

plt.xlabel('Date', fontsize=12)
plt.ylabel('Cumulative Return (%)', fontsize=12)
plt.title('Cumulative Returns from Log Returns (2020-2024)', fontsize=16, pad=20)
plt.legend(loc='best', fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Display final cumulative returns
print("\nFinal Cumulative Returns (from Log Returns):")
for asset in assets:
    log_returns = df[f'{asset}_LogReturn'] / 100
    final_return = (np.exp(log_returns.cumsum().iloc[-1]) - 1) * 100
    print(f"{asset}: {final_return:.2f}%")

## 6. Rolling Statistics (Log Returns)

In [None]:
# Calculate rolling statistics for log returns
window = 30  # 30-day rolling window

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))

# Rolling volatility
for asset in assets:
    rolling_vol = df[f'{asset}_LogReturn'].rolling(window=window).std() * np.sqrt(252)
    ax1.plot(df.index, rolling_vol, label=f'{asset}', linewidth=1.5)

ax1.set_title(f'{window}-Day Rolling Annualized Volatility (Log Returns)', fontsize=14)
ax1.set_ylabel('Volatility (%)', fontsize=12)
ax1.legend(loc='best')
ax1.grid(True, alpha=0.3)

# Rolling correlation with S&P 500
for asset in assets[1:]:  # Skip S&P 500 itself
    rolling_corr = df['SP500_LogReturn'].rolling(window=window).corr(df[f'{asset}_LogReturn'])
    ax2.plot(df.index, rolling_corr, label=f'{asset} vs S&P 500', linewidth=1.5)

ax2.set_title(f'{window}-Day Rolling Correlation with S&P 500 (Log Returns)', fontsize=14)
ax2.set_xlabel('Date', fontsize=12)
ax2.set_ylabel('Correlation', fontsize=12)
ax2.legend(loc='best')
ax2.grid(True, alpha=0.3)
ax2.set_ylim(-1, 1)

plt.tight_layout()
plt.show()

## 7. Risk-Return Analysis (Log Returns)

In [None]:
# Risk-Return scatter plot using log returns
plt.figure(figsize=(10, 8))

# Calculate annualized returns and volatility from log returns
annual_returns = []
annual_vols = []

for asset in assets:
    log_returns = df[f'{asset}_LogReturn'].dropna()
    annual_return = log_returns.mean() * 252
    annual_vol = log_returns.std() * np.sqrt(252)
    annual_returns.append(annual_return)
    annual_vols.append(annual_vol)
    
    plt.scatter(annual_vol, annual_return, s=200, alpha=0.7)
    plt.annotate(asset, (annual_vol, annual_return), 
                xytext=(5, 5), textcoords='offset points', fontsize=12)

plt.xlabel('Annualized Volatility (%) - Log Returns', fontsize=12)
plt.ylabel('Annualized Return (%) - Log Returns', fontsize=12)
plt.title('Risk-Return Profile using Log Returns (2020-2024)', fontsize=16, pad=20)
plt.grid(True, alpha=0.3)

# Add efficient frontier line (simplified)
plt.plot([0, max(annual_vols)*1.1], [0, max(annual_vols)*1.1*0.5], 
         'k--', alpha=0.3, label='Risk-Return Trade-off')
plt.legend()
plt.tight_layout()
plt.show()

## 8. Monthly Performance Heatmap (Log Returns)

In [None]:
# Create monthly returns heatmap for each asset using log returns
fig, axes = plt.subplots(3, 2, figsize=(16, 12))
axes = axes.flatten()

for i, asset in enumerate(assets):
    # Calculate monthly returns from log returns
    # Monthly log return = sum of daily log returns in that month
    monthly_log_returns = df[f'{asset}_LogReturn'].resample('M').sum()
    
    # Pivot to create year-month matrix
    monthly_pivot = monthly_log_returns.to_frame('return')
    monthly_pivot['Year'] = monthly_pivot.index.year
    monthly_pivot['Month'] = monthly_pivot.index.month
    monthly_matrix = monthly_pivot.pivot(index='Year', columns='Month', values='return')
    
    # Create heatmap
    sns.heatmap(monthly_matrix, 
                annot=True, 
                fmt='.1f',
                cmap='RdYlGn', 
                center=0,
                ax=axes[i],
                cbar_kws={'label': 'Log Return (%)'})
    axes[i].set_title(f'{asset} Monthly Log Returns (%)', fontsize=12)
    axes[i].set_xlabel('Month')
    axes[i].set_ylabel('Year')

# Remove empty subplot
fig.delaxes(axes[-1])
plt.tight_layout()
plt.show()

## 9. Comparison: Simple vs Log Returns

In [None]:
# Compare distributions of simple returns vs log returns for Bitcoin (highest volatility)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Simple returns
bitcoin_simple = df['Bitcoin_Return'].dropna()
ax1.hist(bitcoin_simple, bins=50, density=True, alpha=0.7, color='orange', edgecolor='black')
ax1.set_title('Bitcoin Simple Returns Distribution', fontsize=14)
ax1.set_xlabel('Daily Return (%)')
ax1.set_ylabel('Density')
ax1.axvline(bitcoin_simple.mean(), color='red', linestyle='--', label=f'Mean: {bitcoin_simple.mean():.3f}%')
ax1.legend()

# Log returns
bitcoin_log = df['Bitcoin_LogReturn'].dropna()
ax2.hist(bitcoin_log, bins=50, density=True, alpha=0.7, color='blue', edgecolor='black')
ax2.set_title('Bitcoin Log Returns Distribution', fontsize=14)
ax2.set_xlabel('Daily Log Return (%)')
ax2.set_ylabel('Density')
ax2.axvline(bitcoin_log.mean(), color='red', linestyle='--', label=f'Mean: {bitcoin_log.mean():.3f}%')
ax2.legend()

plt.tight_layout()
plt.show()

# Print comparison statistics
print("Bitcoin Returns Comparison:")
print(f"Simple Returns - Mean: {bitcoin_simple.mean():.4f}%, Std: {bitcoin_simple.std():.4f}%, Skew: {bitcoin_simple.skew():.4f}")
print(f"Log Returns    - Mean: {bitcoin_log.mean():.4f}%, Std: {bitcoin_log.std():.4f}%, Skew: {bitcoin_log.skew():.4f}")
print("\nLog returns typically show less skewness and are more suitable for statistical analysis.")

## 10. Export Analysis Results

In [None]:
# Export summary statistics for log returns
summary_stats.to_csv('analysis_summary_statistics_log_returns.csv')
print("Log returns summary statistics exported to 'analysis_summary_statistics_log_returns.csv'")

# Export correlation matrix for log returns
correlation_matrix.to_csv('analysis_correlation_matrix_log_returns.csv')
print("Log returns correlation matrix exported to 'analysis_correlation_matrix_log_returns.csv'")

# Export log returns data
df[log_return_columns].to_csv('daily_log_returns_2020_2024.csv')
print("Log returns data exported to 'daily_log_returns_2020_2024.csv'")

# Create a comprehensive report
report = {
    'analysis_date': datetime.now().strftime('%Y-%m-%d'),
    'analysis_type': 'Log Returns Analysis',
    'data_period': {
        'start': str(df.index.min().date()),
        'end': str(df.index.max().date()),
        'total_days': len(df)
    },
    'assets_analyzed': assets,
    'key_findings': {
        'best_performer': assets[annual_returns.index(max(annual_returns))],
        'highest_volatility': assets[annual_vols.index(max(annual_vols))],
        'lowest_volatility': assets[annual_vols.index(min(annual_vols))],
        'note': 'Analysis performed using log returns for better statistical properties'
    },
    'log_returns_benefits': [
        'Time additivity: log returns can be summed over time',
        'Better distributional properties (closer to normal)',
        'Reduced skewness compared to simple returns',
        'More suitable for statistical modeling and risk analysis'
    ]
}

import json
with open('analysis_report_log_returns.json', 'w') as f:
    json.dump(report, f, indent=2)
    
print("\nLog returns analysis report exported to 'analysis_report_log_returns.json'")
print("\nLog Returns Analysis complete!")
print("\nBenefits of using log returns:")
print("- Better statistical properties (closer to normal distribution)")
print("- Time-additive (can sum log returns over periods)")
print("- Reduced skewness, especially for high-volatility assets")
print("- More appropriate for portfolio optimization and risk modeling")