In [3]:
"""
Commodities Trading Dashboard - Data Collection & Analysis
Author: Ondrej Marvan
Course: Python and SQL - Final Project

This notebook collects and analyzes data for:
- EUA (European Union Allowance)
- Natural Gas
- Gold
- Bitcoin
"""

# ============================================================================
# 1. IMPORT LIBRARIES
# ============================================================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import sqlite3
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
sns.set_style("darkgrid")
plt.rcParams['figure.figsize'] = (14, 7)

# ============================================================================
# 2. DATA COLLECTION
# ============================================================================

# Base path for data files
base_path = '/home/ondrej-marvan/Documents/GitHub/OBS_DataScience/OBS_DataScience/Autumn 2025/2400-DS1SQL Python and SQL intro, SQL platforms/Project/data/'

def load_csv_data(filepath, symbol_name):
    """Load data from CSV file and standardize format"""
    try:
        df = pd.read_csv(filepath)
        
        # Check and standardize column names
        # Common column name variations
        date_cols = ['Date', 'date', 'DATE', 'Datetime', 'datetime']
        price_cols = ['Price', 'Close', 'close', 'CLOSE', 'Adj Close']
        open_cols = ['Open', 'open', 'OPEN']
        high_cols = ['High', 'high', 'HIGH']
        low_cols = ['Low', 'low', 'LOW']
        vol_cols = ['Vol', 'Volume', 'volume', 'VOLUME', 'Vol.']
        
        # Find matching columns
        date_col = next((col for col in df.columns if col in date_cols), df.columns[0])
        price_col = next((col for col in df.columns if col in price_cols), 'Price')
        open_col = next((col for col in df.columns if col in open_cols), 'Open')
        high_col = next((col for col in df.columns if col in high_cols), 'High')
        low_col = next((col for col in df.columns if col in low_cols), 'Low')
        vol_col = next((col for col in df.columns if col in vol_cols), 'Vol')
        
        # Create standardized dataframe
        result_df = pd.DataFrame()
        result_df['Date'] = pd.to_datetime(df[date_col])
        result_df['Open'] = pd.to_numeric(df[open_col], errors='coerce')
        result_df['High'] = pd.to_numeric(df[high_col], errors='coerce')
        result_df['Low'] = pd.to_numeric(df[low_col], errors='coerce')
        result_df['Price'] = pd.to_numeric(df[price_col], errors='coerce')
        
        # Handle volume (might be string with commas or 'K', 'M' suffixes)
        if vol_col in df.columns:
            vol_series = df[vol_col].astype(str).str.replace(',', '')
            # Handle K (thousands) and M (millions) suffixes
            vol_series = vol_series.apply(lambda x: float(x.replace('K', '')) * 1000 if 'K' in str(x).upper() 
                                         else (float(x.replace('M', '')) * 1000000 if 'M' in str(x).upper() 
                                         else x))
            result_df['Vol'] = pd.to_numeric(vol_series, errors='coerce')
        else:
            result_df['Vol'] = 0
        
        result_df['Symbol'] = symbol_name
        result_df = result_df.sort_values('Date')
        
        # Fill missing OHLC values
        result_df['Open'] = result_df['Open'].fillna(result_df['Price'])
        result_df['High'] = result_df['High'].fillna(result_df['Price'])
        result_df['Low'] = result_df['Low'].fillna(result_df['Price'])
        result_df['Vol'] = result_df['Vol'].fillna(0)
        
        print(f"âœ“ Loaded {len(result_df)} records for {symbol_name}")
        return result_df[['Date', 'Symbol', 'Open', 'High', 'Low', 'Price', 'Vol']]
        
    except Exception as e:
        print(f"âœ— Error loading {symbol_name}: {str(e)}")
        return pd.DataFrame()

# Load all data files
print("="*80)
print("LOADING DATA FROM CSV FILES")
print("="*80)

print("\nLoading EUA data...")
eua_df = load_csv_data(base_path + 'EUA_Data.csv', 'EUA')

print("Loading Natural Gas data...")
natgas_df = load_csv_data(base_path + 'NatGas_data.csv', 'NATGAS')

print("Loading Gold data...")
gold_df = load_csv_data(base_path + 'Gold_Data.csv', 'GOLD')

print("Loading Bitcoin data...")
bitcoin_df = load_csv_data(base_path + 'BTC_Data.csv', 'BTC')

# Combine all data
all_data = pd.concat([eua_df, natgas_df, gold_df, bitcoin_df], ignore_index=True)
print(f"\n{'='*80}")
print(f"Total records collected: {len(all_data)}")
print(f"Date range: {all_data['Date'].min()} to {all_data['Date'].max()}")
print(f"{'='*80}\n")

# ============================================================================
# 3. DATA CLEANING & PREPROCESSING
# ============================================================================

def clean_data(df):
    """Clean and preprocess the data"""
    print("Cleaning data...")
    
    # Remove duplicates
    initial_count = len(df)
    df = df.drop_duplicates(subset=['Date', 'Symbol'])
    print(f"  - Removed {initial_count - len(df)} duplicate records")
    
    # Handle missing values
    missing_before = df.isnull().sum().sum()
    df = df.dropna(subset=['Open', 'High', 'Low', 'Price'])
    print(f"  - Removed {missing_before} missing values")
    
    # Ensure proper data types
    df['Date'] = pd.to_datetime(df['Date'])
    numeric_cols = ['Open', 'High', 'Low', 'Price', 'Vol']
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # Remove any rows with negative or zero prices (data errors)
    price_errors = len(df[df['Price'] <= 0])
    df = df[df['Price'] > 0]
    if price_errors > 0:
        print(f"  - Removed {price_errors} rows with invalid prices")
    
    # Sort by symbol and date
    df = df.sort_values(['Symbol', 'Date'])
    
    # Reset index
    df = df.reset_index(drop=True)
    
    print(f"  âœ“ Cleaning complete: {len(df)} clean records\n")
    return df

all_data = clean_data(all_data)

# Display sample data
print("Sample of cleaned data:")
print(all_data.groupby('Symbol').head(2))
print("\n")

# ============================================================================
# 4. TECHNICAL INDICATORS CALCULATION
# ============================================================================

def calculate_sma(df, periods=[20, 50, 200]):
    """Calculate Simple Moving Averages"""
    print(f"Calculating Simple Moving Averages ({periods})...")
    for period in periods:
        df[f'SMA_{period}'] = df.groupby('Symbol')['Price'].transform(
            lambda x: x.rolling(window=period, min_periods=1).mean()
        )
    return df

def calculate_bollinger_bands(df, period=20, std_dev=2):
    """Calculate Bollinger Bands"""
    print(f"Calculating Bollinger Bands (period={period}, std_dev={std_dev})...")
    df['BB_Middle'] = df.groupby('Symbol')['Price'].transform(
        lambda x: x.rolling(window=period, min_periods=1).mean()
    )
    df['BB_Std'] = df.groupby('Symbol')['Price'].transform(
        lambda x: x.rolling(window=period, min_periods=1).std()
    )
    df['BB_Upper'] = df['BB_Middle'] + (std_dev * df['BB_Std'])
    df['BB_Lower'] = df['BB_Middle'] - (std_dev * df['BB_Std'])
    return df

def calculate_rsi(df, period=14):
    """Calculate Relative Strength Index"""
    print(f"Calculating RSI (period={period})...")
    def rsi_calc(prices):
        deltas = prices.diff()
        gain = deltas.where(deltas > 0, 0).rolling(window=period, min_periods=1).mean()
        loss = -deltas.where(deltas < 0, 0).rolling(window=period, min_periods=1).mean()
        rs = gain / loss
        return 100 - (100 / (1 + rs))
    
    df['RSI'] = df.groupby('Symbol')['Price'].transform(rsi_calc)
    return df

def calculate_daily_returns(df):
    """Calculate daily returns"""
    print("Calculating daily returns...")
    df['Daily_Return'] = df.groupby('Symbol')['Price'].pct_change() * 100
    return df

# Apply technical indicators
print("\n" + "="*80)
print("CALCULATING TECHNICAL INDICATORS")
print("="*80 + "\n")

all_data = calculate_sma(all_data, [20, 50, 200])
all_data = calculate_bollinger_bands(all_data)
all_data = calculate_rsi(all_data)
all_data = calculate_daily_returns(all_data)

print("âœ“ All technical indicators calculated\n")

# ============================================================================
# 5. BACKWARDATION ANALYSIS
# ============================================================================

def calculate_contango_backwardation(df):
    """
    Calculate contango/backwardation indicator
    Positive = Contango (futures > spot)
    Negative = Backwardation (futures < spot)
    """
    print("Calculating backwardation/contango indicators...")
    
    # Using Close vs Open as proxy for futures curve
    df['Futures_Spread'] = ((df['Price'] - df['Open']) / df['Open']) * 100
    
    # 10-day rolling average of spread
    df['Spread_MA10'] = df.groupby('Symbol')['Futures_Spread'].transform(
        lambda x: x.rolling(window=10, min_periods=1).mean()
    )
    
    # Classify market structure
    df['Market_Structure'] = df['Spread_MA10'].apply(
        lambda x: 'Contango' if x > 0.5 else ('Backwardation' if x < -0.5 else 'Normal')
    )
    
    print("âœ“ Backwardation analysis complete\n")
    return df

print("="*80)
print("BACKWARDATION/CONTANGO ANALYSIS")
print("="*80 + "\n")

all_data = calculate_contango_backwardation(all_data)

# ============================================================================
# 6. SAVE TO SQL DATABASE
# ============================================================================

def save_to_database(df, db_name='commodities_trading.db'):
    """Save data to SQLite database"""
    print("="*80)
    print("SAVING DATA TO DATABASE")
    print("="*80 + "\n")
    
    conn = sqlite3.connect(db_name)
    
    # Main prices table
    print("Creating 'prices' table...")
    df.to_sql('prices', conn, if_exists='replace', index=False)
    print(f"  âœ“ Saved {len(df)} records to 'prices' table")
    
    # Create aggregated statistics table
    print("\nCreating 'statistics' table...")
    stats_df = df.groupby('Symbol').agg({
        'Price': ['mean', 'std', 'min', 'max'],
        'Vol': 'sum',
        'Daily_Return': ['mean', 'std']
    }).reset_index()
    stats_df.columns = ['Symbol', 'Avg_Price', 'Price_Std', 'Min_Price', 'Max_Price', 
                        'Total_Volume', 'Avg_Return', 'Return_Volatility']
    stats_df.to_sql('statistics', conn, if_exists='replace', index=False)
    print(f"  âœ“ Saved statistics for {len(stats_df)} commodities")
    
    # Create users table for login
    print("\nCreating 'users' table...")
    users_df = pd.DataFrame({
        'username': ['admin', 'trader1', 'demo_user'],
        'password': ['admin123', 'trader123', 'demo123'],
        'role': ['admin', 'trader', 'demo']
    })
    users_df.to_sql('users', conn, if_exists='replace', index=False)
    print(f"  âœ“ Created {len(users_df)} user accounts")
    
    # Create orders table
    print("\nCreating 'orders' table...")
    conn.execute('''
        CREATE TABLE IF NOT EXISTS orders (
            order_id INTEGER PRIMARY KEY AUTOINCREMENT,
            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
            username TEXT,
            symbol TEXT,
            order_type TEXT,
            quantity REAL,
            price REAL,
            total_value REAL,
            status TEXT DEFAULT 'Pending'
        )
    ''')
    print("  âœ“ Orders table created")
    
    conn.commit()
    conn.close()
    print(f"\n{'='*80}")
    print(f"âœ“ All data saved to '{db_name}'")
    print(f"{'='*80}\n")

save_to_database(all_data)

# ============================================================================
# 7. EXPLORATORY DATA ANALYSIS & VISUALIZATIONS
# ============================================================================

print("\n" + "="*80)
print("EXPLORATORY DATA ANALYSIS")
print("="*80)

# Summary statistics by commodity
print("\n1. SUMMARY STATISTICS BY COMMODITY:")
print("-" * 80)
summary_stats = all_data.groupby('Symbol')[['Price', 'Vol', 'Daily_Return', 'RSI']].describe()
print(summary_stats)

# Recent market structure
print("\n2. CURRENT MARKET STRUCTURE:")
print("-" * 80)
recent_structure = all_data.groupby('Symbol').tail(1)[['Symbol', 'Price', 'Market_Structure', 'Spread_MA10', 'RSI']]
print(recent_structure.to_string(index=False))

# Data availability
print("\n3. DATA AVAILABILITY BY COMMODITY:")
print("-" * 80)
data_counts = all_data.groupby('Symbol').agg({
    'Date': ['min', 'max', 'count']
})
data_counts.columns = ['Start Date', 'End Date', 'Record Count']
print(data_counts)

print("\n" + "="*80)
print("GENERATING VISUALIZATIONS")
print("="*80 + "\n")

# Visualization 1: Price Trends
print("1. Creating price trends chart...")
plt.figure(figsize=(16, 10))
for i, symbol in enumerate(['EUA', 'NATGAS', 'GOLD', 'BTC'], 1):
    plt.subplot(2, 2, i)
    data = all_data[all_data['Symbol'] == symbol].tail(365)
    if len(data) > 0:
        plt.plot(data['Date'], data['Price'], label='Price', linewidth=2, color='#2E86AB')
        plt.plot(data['Date'], data['SMA_20'], label='SMA 20', alpha=0.7, color='#A23B72')
        plt.plot(data['Date'], data['SMA_50'], label='SMA 50', alpha=0.7, color='#F18F01')
        plt.title(f'{symbol} Price Trend (Last Year)', fontsize=14, fontweight='bold')
        plt.xlabel('Date')
        plt.ylabel('Price ($)')
        plt.legend()
        plt.xticks(rotation=45)
        plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('price_trends.png', dpi=300, bbox_inches='tight')
print("  âœ“ Saved: price_trends.png")
plt.close()

# Visualization 2: Bollinger Bands
print("2. Creating Bollinger Bands chart...")
plt.figure(figsize=(16, 10))
for i, symbol in enumerate(['EUA', 'NATGAS', 'GOLD', 'BTC'], 1):
    plt.subplot(2, 2, i)
    data = all_data[all_data['Symbol'] == symbol].tail(180)
    if len(data) > 0:
        plt.plot(data['Date'], data['Price'], label='Price', linewidth=2, color='black')
        plt.plot(data['Date'], data['BB_Upper'], label='Upper Band', linestyle='--', alpha=0.7, color='red')
        plt.plot(data['Date'], data['BB_Middle'], label='Middle Band', alpha=0.7, color='blue')
        plt.plot(data['Date'], data['BB_Lower'], label='Lower Band', linestyle='--', alpha=0.7, color='green')
        plt.fill_between(data['Date'], data['BB_Lower'], data['BB_Upper'], alpha=0.2, color='lightblue')
        plt.title(f'{symbol} Bollinger Bands', fontsize=14, fontweight='bold')
        plt.xlabel('Date')
        plt.ylabel('Price ($)')
        plt.legend()
        plt.xticks(rotation=45)
        plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('bollinger_bands.png', dpi=300, bbox_inches='tight')
print("  âœ“ Saved: bollinger_bands.png")
plt.close()

# Visualization 3: RSI Comparison
print("3. Creating RSI analysis chart...")
plt.figure(figsize=(16, 10))
for i, symbol in enumerate(['EUA', 'NATGAS', 'GOLD', 'BTC'], 1):
    plt.subplot(2, 2, i)
    data = all_data[all_data['Symbol'] == symbol].tail(180)
    if len(data) > 0:
        plt.plot(data['Date'], data['RSI'], linewidth=2, color='purple')
        plt.axhline(y=70, color='r', linestyle='--', label='Overbought (70)')
        plt.axhline(y=30, color='g', linestyle='--', label='Oversold (30)')
        plt.fill_between(data['Date'], 30, 70, alpha=0.1, color='gray')
        plt.title(f'{symbol} RSI (Relative Strength Index)', fontsize=14, fontweight='bold')
        plt.xlabel('Date')
        plt.ylabel('RSI')
        plt.legend()
        plt.ylim(0, 100)
        plt.xticks(rotation=45)
        plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('rsi_analysis.png', dpi=300, bbox_inches='tight')
print("  âœ“ Saved: rsi_analysis.png")
plt.close()

# Visualization 4: Daily Returns Distribution
print("4. Creating returns distribution chart...")
plt.figure(figsize=(16, 10))
for i, symbol in enumerate(['EUA', 'NATGAS', 'GOLD', 'BTC'], 1):
    plt.subplot(2, 2, i)
    data = all_data[all_data['Symbol'] == symbol]['Daily_Return'].dropna()
    if len(data) > 0:
        plt.hist(data, bins=50, edgecolor='black', alpha=0.7, color='skyblue')
        plt.axvline(data.mean(), color='r', linestyle='--', linewidth=2, label=f'Mean: {data.mean():.2f}%')
        plt.axvline(data.median(), color='g', linestyle='--', linewidth=2, label=f'Median: {data.median():.2f}%')
        plt.title(f'{symbol} Daily Returns Distribution', fontsize=14, fontweight='bold')
        plt.xlabel('Daily Return (%)')
        plt.ylabel('Frequency')
        plt.legend()
        plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('returns_distribution.png', dpi=300, bbox_inches='tight')
print("  âœ“ Saved: returns_distribution.png")
plt.close()

# Visualization 5: Correlation Heatmap
print("5. Creating correlation matrix...")
plt.figure(figsize=(10, 8))
pivot_data = all_data.pivot_table(values='Price', index='Date', columns='Symbol')
correlation = pivot_data.corr()
sns.heatmap(correlation, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=1, cbar_kws={"shrink": 0.8},
            fmt='.3f', annot_kws={'size': 14})
plt.title('Price Correlation Matrix', fontsize=16, fontweight='bold', pad=20)
plt.tight_layout()
plt.savefig('correlation_matrix.png', dpi=300, bbox_inches='tight')
print("  âœ“ Saved: correlation_matrix.png")
plt.close()

# Visualization 6: Backwardation/Contango Analysis
print("6. Creating backwardation/contango chart...")
plt.figure(figsize=(16, 10))
for i, symbol in enumerate(['EUA', 'NATGAS', 'GOLD', 'BTC'], 1):
    plt.subplot(2, 2, i)
    data = all_data[all_data['Symbol'] == symbol].tail(180)
    if len(data) > 0:
        colors = data['Spread_MA10'].apply(lambda x: 'green' if x < 0 else 'red')
        plt.bar(data['Date'], data['Spread_MA10'], color=colors, alpha=0.6, width=1)
        plt.axhline(y=0, color='black', linestyle='-', linewidth=2)
        plt.axhline(y=0.5, color='red', linestyle='--', alpha=0.5, label='Contango Threshold')
        plt.axhline(y=-0.5, color='green', linestyle='--', alpha=0.5, label='Backwardation Threshold')
        plt.title(f'{symbol} Market Structure (Backwardation/Contango)', fontsize=14, fontweight='bold')
        plt.xlabel('Date')
        plt.ylabel('Spread (%)')
        plt.legend()
        plt.xticks(rotation=45)
        plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('market_structure.png', dpi=300, bbox_inches='tight')
print("  âœ“ Saved: market_structure.png")
plt.close()

# Visualization 7: Volume Analysis
print("7. Creating volume analysis chart...")
plt.figure(figsize=(16, 10))
for i, symbol in enumerate(['EUA', 'NATGAS', 'GOLD', 'BTC'], 1):
    plt.subplot(2, 2, i)
    data = all_data[all_data['Symbol'] == symbol].tail(180)
    if len(data) > 0 and data['Vol'].sum() > 0:
        colors = ['red' if ret < 0 else 'green' for ret in data['Daily_Return'].fillna(0)]
        plt.bar(data['Date'], data['Vol'], alpha=0.6, color=colors, width=1)
        plt.title(f'{symbol} Trading Volume', fontsize=14, fontweight='bold')
        plt.xlabel('Date')
        plt.ylabel('Volume')
        plt.xticks(rotation=45)
        plt.grid(True, alpha=0.3)
    else:
        plt.text(0.5, 0.5, f'No volume data available for {symbol}', 
                ha='center', va='center', transform=plt.gca().transAxes)
plt.tight_layout()
plt.savefig('volume_analysis.png', dpi=300, bbox_inches='tight')
print("  âœ“ Saved: volume_analysis.png")
plt.close()

# Visualization 8: Cumulative Returns
print("8. Creating cumulative returns chart...")
plt.figure(figsize=(14, 8))
for symbol in ['EUA', 'NATGAS', 'GOLD', 'BTC']:
    data = all_data[all_data['Symbol'] == symbol].copy()
    if len(data) > 0:
        data['Cumulative_Return'] = (1 + data['Daily_Return']/100).cumprod() - 1
        plt.plot(data['Date'], data['Cumulative_Return'] * 100, label=symbol, linewidth=2)
plt.title('Cumulative Returns Comparison', fontsize=16, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Cumulative Return (%)')
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.axhline(y=0, color='black', linestyle='-', linewidth=1)
plt.tight_layout()
plt.savefig('cumulative_returns.png', dpi=300, bbox_inches='tight')
print("  âœ“ Saved: cumulative_returns.png")
plt.close()

# Visualization 9: Risk-Return Scatter
print("9. Creating risk-return profile chart...")
plt.figure(figsize=(12, 8))
risk_return = all_data.groupby('Symbol').agg({
    'Daily_Return': ['mean', 'std']
}).reset_index()
risk_return.columns = ['Symbol', 'Avg_Return', 'Volatility']

colors = {'EUA': '#2E86AB', 'NATGAS': '#A23B72', 'GOLD': '#F18F01', 'BTC': '#C73E1D'}
for _, row in risk_return.iterrows():
    plt.scatter(row['Volatility'], row['Avg_Return'], s=300, alpha=0.6, 
               color=colors.get(row['Symbol'], 'gray'))
    plt.annotate(row['Symbol'], (row['Volatility'], row['Avg_Return']), 
                fontsize=14, ha='center', fontweight='bold')

plt.axhline(y=0, color='black', linestyle='--', linewidth=1)
plt.axvline(x=0, color='black', linestyle='--', linewidth=1)
plt.title('Risk-Return Profile', fontsize=16, fontweight='bold')
plt.xlabel('Volatility (Std Dev of Daily Returns)', fontsize=12)
plt.ylabel('Average Daily Return (%)', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('risk_return.png', dpi=300, bbox_inches='tight')
print("  âœ“ Saved: risk_return.png")
plt.close()

# Visualization 10: Price Performance Comparison (Normalized)
print("10. Creating normalized performance chart...")
plt.figure(figsize=(14, 8))
for symbol in ['EUA', 'NATGAS', 'GOLD', 'BTC']:
    data = all_data[all_data['Symbol'] == symbol].copy()
    if len(data) > 0 and data['Price'].iloc[0] > 0:
        data['Normalized_Price'] = (data['Price'] / data['Price'].iloc[0]) * 100
        plt.plot(data['Date'], data['Normalized_Price'], label=symbol, linewidth=2)
plt.title('Normalized Price Performance (Base = 100)', fontsize=16, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Normalized Price')
plt.legend(fontsize=12)
plt.axhline(y=100, color='black', linestyle='--', linewidth=1, alpha=0.5)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('normalized_performance.png', dpi=300, bbox_inches='tight')
print("  âœ“ Saved: normalized_performance.png")
plt.close()

print("\n" + "="*80)
print("ANALYSIS COMPLETE!")
print("="*80)
print(f"\nâœ“ Database created: commodities_trading.db")
print(f"âœ“ Total visualizations saved: 10 PNG files")
print(f"\nðŸ“Š Summary by Commodity:")
print("-" * 80)
for symbol in ['EUA', 'NATGAS', 'GOLD', 'BTC']:
    symbol_data = all_data[all_data['Symbol'] == symbol]
    if len(symbol_data) > 0:
        latest = symbol_data.iloc[-1]
        print(f"\n{symbol}:")
        print(f"  Records: {len(symbol_data)}")
        print(f"  Date Range: {symbol_data['Date'].min().date()} to {symbol_data['Date'].max().date()}")
        print(f"  Current Price: ${latest['Price']:.2f}")
        print(f"  RSI: {latest['RSI']:.2f}")
        print(f"  Market Structure: {latest['Market_Structure']}")

print(f"\n{'='*80}")
print("Next step: Run the Streamlit app using:")
print("streamlit run streamlit_app.py")
print("="*80)

LOADING DATA FROM CSV FILES

Loading EUA data...
âœ“ Loaded 254 records for EUA
Loading Natural Gas data...
âœ“ Loaded 258 records for NATGAS
Loading Gold data...
âœ“ Loaded 22 records for GOLD
Loading Bitcoin data...
âœ“ Loaded 364 records for BTC

Total records collected: 898
Date range: 2025-01-01 00:00:00 to 2025-12-30 00:00:00

Cleaning data...
  - Removed 0 duplicate records
  - Removed 1544 missing values
  âœ“ Cleaning complete: 512 clean records

Sample of cleaned data:
          Date  Symbol    Open    High     Low   Price       Vol
0   2025-01-02     EUA  73.550  75.350  72.000  75.190   26450.0
1   2025-01-03     EUA  75.050  76.350  74.860  75.940   23020.0
254 2025-01-02  NATGAS   3.656   3.801   3.541   3.660  156670.0
255 2025-01-03  NATGAS   3.669   3.680   3.330   3.354  188090.0



CALCULATING TECHNICAL INDICATORS

Calculating Simple Moving Averages ([20, 50, 200])...
Calculating Bollinger Bands (period=20, std_dev=2)...
Calculating RSI (period=14)...
Calculating dai