### Bollingers Slim - update signal_cnt_tbl, update bollinger_tbl, create Tableau csv

In [18]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import psycopg2
from datetime import datetime, timedelta

# Database connection parameters
db_params = {
    'dbname': 'twt_snt',
    'user': 'postgres',
    'password': 'Ilpmnl!69gg',
    'host': 'localhost',
    'port': '5432'
}

# Fetch price data
def fetch_price_data(start_date, end_date, term):
    try:
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()
        query = """
            SELECT date AS "Date", close AS "Close"
            FROM yahoo_price_tbl
            WHERE term = %s AND date BETWEEN %s AND %s
            ORDER BY date
        """
        cursor.execute(query, (term, start_date, end_date))
        rows = cursor.fetchall()
        if not rows:
            print(f"No price data returned for {term}.")
            return None
        df = pd.DataFrame(rows, columns=[desc[0] for desc in cursor.description])

        # Convert Date column to datetime and set as index
        df['Date'] = pd.to_datetime(df['Date'])
        df.set_index('Date', inplace=True)

        # Create a complete date range and reindex the price data
        all_days = pd.date_range(start=start_date, end=end_date, freq='D')
        df = df.reindex(all_days)
        
        # Forward fill missing prices over weekends/holidays
        df['Close'] = df['Close'].ffill()

        return df
    except Exception as e:
        print(f"Error fetching price data for {term}: {e}")
        return None
    finally:
        cursor.close()
        conn.close()

# Fetch sentiment data
def fetch_moving_averages(start_date, end_date, term):
    try:
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()
        query = """
            SELECT date AS "Date", combined_compound_ma_7
            FROM snt_ma_tbl
            WHERE term = %s AND date BETWEEN %s AND %s
            ORDER BY date
        """
        cursor.execute(query, (term, start_date, end_date))
        rows = cursor.fetchall()
        if not rows:
            print(f"No sentiment data returned for {term}.")
            return None
        df = pd.DataFrame(rows, columns=[desc[0] for desc in cursor.description])

        # Convert Date column to datetime and set as index
        df['Date'] = pd.to_datetime(df['Date'])
        df.set_index('Date', inplace=True)

        # Create a complete date range to align sentiment data
        all_days = pd.date_range(start=start_date, end=end_date, freq='D')
        df = df.reindex(all_days)

        # Forward fill missing sentiment data
        df['combined_compound_ma_7'] = df['combined_compound_ma_7'].ffill()

        return df
    except Exception as e:
        print(f"Error fetching sentiment data for {term}: {e}")
        return None
    finally:
        cursor.close()
        conn.close()

# Bollinger Bands
def calculate_bollinger_bands(df, column_name='Close', window=20, num_std_dev=2):
    df['MA'] = df[column_name].rolling(window=window).mean()
    df['STD'] = df[column_name].rolling(window=window).std()
    df['Upper Band'] = df['MA'] + (num_std_dev * df['STD'])
    df['Lower Band'] = df['MA'] - (num_std_dev * df['STD'])
    return df

# Scale sentiment
def scale_sentiment_relative_to_price(sentiment_df, price_df):
    price_min = price_df['Close'].min()
    price_max = price_df['Close'].max()

    if price_min == price_max:
        # Avoid error by just assigning the raw sentiment or a constant value
        print("Warning: Constant price detected. Skipping sentiment scaling.")
        sentiment_df['scaled_combined_compound_ma_7'] = sentiment_df['combined_compound_ma_7']
    else:
        scaler = MinMaxScaler(feature_range=(price_min, price_max))
        sentiment_df['scaled_combined_compound_ma_7'] = scaler.fit_transform(sentiment_df[['combined_compound_ma_7']])
    
    return sentiment_df

# Signals
def generate_signals(df, price_df):
    df['Buy Signal'] = (price_df['Close'] < df['Lower Band'])
    df['Sell Signal'] = (price_df['Close'] > df['Upper Band'])
    return df

# Backtest
def backtest_strategy(df, price_df, initial_balance=10000):
    balance = initial_balance
    position = 0
    portfolio_value = []

    for index, row in df.iterrows():
        close_price = price_df.at[index, 'Close']
        if row['Buy Signal'] and balance > 0:
            position = balance / close_price
            balance = 0
        elif row['Sell Signal'] and position > 0:
            balance = position * close_price
            position = 0
        portfolio_value.append(balance + (position * close_price))

    df['Portfolio Value'] = portfolio_value
    return df

# Buy and Hold
def calculate_buy_and_hold_portfolio(price_df, initial_balance=10000):
    initial_price = price_df['Close'].iloc[0]
    price_df['Buy and Hold Portfolio'] = initial_balance * (price_df['Close'] / initial_price)
    return price_df

# MAIN EXECUTION
terms = [
    'SOL', 'KAS', 'LINK', 'ADA', 'MATIC', 'AVAX', 'POPCAT', 'SUI', 'HNT', 'WIF', 'BTC', 'DOGE', 'ETH',
    'GME', 'NVDA', 'JPM', 'GOOGL', 'DXY', 'TSMC', 'CVX', 'COIN', 'AMZN', 'MSFT', 'NFLX', 'DIS', 'AAPL', 'TSLA'
]

timeframes = [30, 60, 90, 120, 150]
all_combined_results = []

for term in terms:
    print(f"\n\n===== Processing Asset: {term} =====")

    for days in timeframes:
        print(f"\n--- Processing {days}-Day Period ---")
        
        # Extend date range by 20 days to account for Bollinger Band calculation
        end_date = datetime.today().strftime('%Y-%m-%d')
        extended_start_date = (datetime.today() - timedelta(days=days + 20)).strftime('%Y-%m-%d')
        trimmed_start_date = (datetime.today() - timedelta(days=days)).strftime('%Y-%m-%d')
        print(f"Using extended date range: {extended_start_date} to {end_date}")

        price_df = fetch_price_data(extended_start_date, end_date, term)
        sentiment_df = fetch_moving_averages(extended_start_date, end_date, term)

        if price_df is not None and sentiment_df is not None and not price_df.empty and not sentiment_df.empty:
            common_index = price_df.index.intersection(sentiment_df.index)
            price_df = price_df.loc[common_index].copy()
            sentiment_df = sentiment_df.loc[common_index].copy()

            # Forward fill missing values after merging
            price_df.ffill(inplace=True)
            sentiment_df.ffill(inplace=True)

            # Ensure minimum data length to calculate Bollinger Bands
            if len(price_df) <= 20:
                print(f"Not enough data to calculate Bollinger Bands for {term} - {days}-day window. Skipping.")
                continue

            # Price strategy
            price_df = calculate_bollinger_bands(price_df)
            price_df = generate_signals(price_df, price_df)
            price_df = backtest_strategy(price_df, price_df)

            # Sentiment strategy
            sentiment_df = scale_sentiment_relative_to_price(sentiment_df, price_df)
            sentiment_df = calculate_bollinger_bands(sentiment_df, column_name='scaled_combined_compound_ma_7')
            sentiment_df = generate_signals(sentiment_df, price_df)
            sentiment_df = backtest_strategy(sentiment_df, price_df)

            # Trim the first 20 days to remove Bollinger Band warm-up period
            price_df = price_df.iloc[20:].copy()
            sentiment_df = sentiment_df.iloc[20:].copy()

            # Buy and Hold
            price_df = calculate_buy_and_hold_portfolio(price_df)

            # Prefix columns after all logic
            price_df = price_df.add_prefix('Price_')
            sentiment_df = sentiment_df.add_prefix('Sentiment_')

            # Add metadata
            price_df['Asset'] = term
            price_df['Timeframe'] = f'{days}-day'
            sentiment_df['Asset'] = term
            sentiment_df['Timeframe'] = f'{days}-day'

            # Merge and store results
            merged_df = pd.concat([price_df, sentiment_df], axis=1, join='inner')
            all_combined_results.append(merged_df)
        else:
            print("No data available for this asset and timeframe.")

# Final assembly
final_combined_df = pd.concat(all_combined_results)
final_combined_df.reset_index(inplace=True)
final_combined_df.rename(columns={'index': 'Date'}, inplace=True)

# Remove duplicated columns
final_combined_df = final_combined_df.loc[:, ~final_combined_df.columns.duplicated()]

# Add final portfolio values per Asset & Timeframe
def get_final_values(df, column):
    return df.groupby(['Asset', 'Timeframe'])[column].transform(lambda x: x.ffill().iloc[-1] if not x.isnull().all() else None)

final_combined_df['Price_Portfolio Final'] = get_final_values(final_combined_df, 'Price_Portfolio Value')
final_combined_df['Sentiment_Portfolio Final'] = get_final_values(final_combined_df, 'Sentiment_Portfolio Value')
final_combined_df['BuyHold_Portfolio Final'] = get_final_values(final_combined_df, 'Price_Buy and Hold Portfolio')

# Fill missing portfolio values
portfolio_cols = ['Sentiment_Portfolio Value', 'Price_Portfolio Value', 'Price_Buy and Hold Portfolio']
for col in portfolio_cols:
    if col in final_combined_df.columns:
        final_combined_df[col] = final_combined_df.groupby('Asset')[col].ffill()

final_combined_df[portfolio_cols] = final_combined_df[portfolio_cols].apply(pd.to_numeric, errors='coerce')
final_combined_df.ffill(inplace=True)

# Output
print("\n===== Final Combined DataFrame =====")
print(final_combined_df.head())

# Save to file
final_combined_df.to_csv('bollinger_backtest_tableau.csv', index=False)




===== Processing Asset: SOL =====

--- Processing 30-Day Period ---
Using extended date range: 2025-04-19 to 2025-06-08

--- Processing 60-Day Period ---
Using extended date range: 2025-03-20 to 2025-06-08

--- Processing 90-Day Period ---
Using extended date range: 2025-02-18 to 2025-06-08

--- Processing 120-Day Period ---
Using extended date range: 2025-01-19 to 2025-06-08

--- Processing 150-Day Period ---
Using extended date range: 2024-12-20 to 2025-06-08


===== Processing Asset: KAS =====

--- Processing 30-Day Period ---
Using extended date range: 2025-04-19 to 2025-06-08

--- Processing 60-Day Period ---
Using extended date range: 2025-03-20 to 2025-06-08

--- Processing 90-Day Period ---
Using extended date range: 2025-02-18 to 2025-06-08

--- Processing 120-Day Period ---
Using extended date range: 2025-01-19 to 2025-06-08

--- Processing 150-Day Period ---
Using extended date range: 2024-12-20 to 2025-06-08


===== Processing Asset: LINK =====

--- Processing 30-Day Peri

No price data returned for DXY.
No data available for this asset and timeframe.

--- Processing 90-Day Period ---
Using extended date range: 2025-02-18 to 2025-06-08

--- Processing 120-Day Period ---
Using extended date range: 2025-01-19 to 2025-06-08

--- Processing 150-Day Period ---
Using extended date range: 2024-12-20 to 2025-06-08


===== Processing Asset: TSMC =====

--- Processing 30-Day Period ---
Using extended date range: 2025-04-19 to 2025-06-08

--- Processing 60-Day Period ---
Using extended date range: 2025-03-20 to 2025-06-08

--- Processing 90-Day Period ---
Using extended date range: 2025-02-18 to 2025-06-08

--- Processing 120-Day Period ---
Using extended date range: 2025-01-19 to 2025-06-08

--- Processing 150-Day Period ---
Using extended date range: 2024-12-20 to 2025-06-08


===== Processing Asset: CVX =====

--- Processing 30-Day Period ---
Using extended date range: 2025-04-19 to 2025-06-08

--- Processing 60-Day Period ---
Using extended date range: 2025-03-

In [19]:
final_combined_df.columns

Index(['Date', 'Price_Close', 'Price_MA', 'Price_STD', 'Price_Upper Band',
       'Price_Lower Band', 'Price_Buy Signal', 'Price_Sell Signal',
       'Price_Portfolio Value', 'Price_Buy and Hold Portfolio', 'Asset',
       'Timeframe', 'Sentiment_combined_compound_ma_7',
       'Sentiment_scaled_combined_compound_ma_7', 'Sentiment_MA',
       'Sentiment_STD', 'Sentiment_Upper Band', 'Sentiment_Lower Band',
       'Sentiment_Buy Signal', 'Sentiment_Sell Signal',
       'Sentiment_Portfolio Value', 'Price_Portfolio Final',
       'Sentiment_Portfolio Final', 'BuyHold_Portfolio Final'],
      dtype='object')

In [20]:
# === STEP 1: Portfolio Statistics Summary ===

# Function to calculate Total Return %
def calculate_total_returns(df):
    result_list = []

    # Group by Asset and Timeframe
    grouped = df.groupby(['Asset', 'Timeframe'])
    for (asset, timeframe), group in grouped:
        try:
            initial_price = group['Price_Portfolio Value'].iloc[0]
            final_price = group['Price_Portfolio Value'].iloc[-1]
            price_return = ((final_price - initial_price) / initial_price) * 100

            initial_sent = group['Sentiment_Portfolio Value'].iloc[0]
            final_sent = group['Sentiment_Portfolio Value'].iloc[-1]
            sentiment_return = ((final_sent - initial_sent) / initial_sent) * 100

            initial_bnh = group['Price_Buy and Hold Portfolio'].iloc[0]
            final_bnh = group['Price_Buy and Hold Portfolio'].iloc[-1]
            bnh_return = ((final_bnh - initial_bnh) / initial_bnh) * 100

            result_list.append({
                'Asset': asset,
                'Timeframe': timeframe,
                'Price Total Return (%)': price_return,
                'Sentiment Total Return (%)': sentiment_return,
                'Buy & Hold Return (%)': bnh_return
            })
        except Exception as e:
            print(f"Error calculating returns for {asset} {timeframe}: {e}")

    summary_df = pd.DataFrame(result_list)
    print("\n===== Portfolio Total Return Summary =====")
    print(summary_df)

    # Export
    summary_df.to_csv('bollinger_backtest_total_returns.csv', index=False)

# === Call the function ===
calculate_total_returns(final_combined_df)



===== Portfolio Total Return Summary =====
    Asset Timeframe  Price Total Return (%)  Sentiment Total Return (%)  \
0    AAPL   120-day              -13.567584                    0.000000   
1    AAPL   150-day               -5.042360                    1.639835   
2    AAPL    30-day                0.000000                    4.429764   
3    AAPL    60-day                2.549661                    0.000000   
4    AAPL    90-day              -10.356954                    0.000000   
..    ...       ...                     ...                         ...   
128   WIF   120-day               15.863019                  109.281062   
129   WIF   150-day              -42.791291                   28.384395   
130   WIF    30-day                1.991565                    0.000000   
131   WIF    60-day                1.991565                   86.730656   
132   WIF    90-day               19.164259                  167.704741   

     Buy & Hold Return (%)  
0               -10.416026

In [21]:
#terms = ['SOL']  # Add your assets here

#timeframes = [30,60,90,120,150]

# === STEP: Combined Portfolio Statistics Summary ===

def calculate_combined_portfolio_statistics(df):
    returns_result = []
    stats_result = []

    grouped = df.groupby(['Asset', 'Timeframe'])
    for (asset, timeframe), group in grouped:
        try:
            # === Total Return % ===
            initial_price = group['Price_Portfolio Value'].iloc[0]
            final_price = group['Price_Portfolio Value'].iloc[-1]
            price_return = ((final_price - initial_price) / initial_price) * 100

            initial_sent = group['Sentiment_Portfolio Value'].iloc[0]
            final_sent = group['Sentiment_Portfolio Value'].iloc[-1]
            sentiment_return = ((final_sent - initial_sent) / initial_sent) * 100

            initial_bnh = group['Price_Buy and Hold Portfolio'].iloc[0]
            final_bnh = group['Price_Buy and Hold Portfolio'].iloc[-1]
            bnh_return = ((final_bnh - initial_bnh) / initial_bnh) * 100

            returns_result.append({
                'Asset': asset,
                'Timeframe': timeframe,
                'Price Total Return (%)': price_return,
                'Sentiment Total Return (%)': sentiment_return,
                'Buy & Hold Return (%)': bnh_return
            })

            # === Volatility, Drawdown, Sharpe ===
            daily_price_pct = group['Price_Portfolio Value'].pct_change().dropna()
            daily_sentiment_pct = group['Sentiment_Portfolio Value'].pct_change().dropna()
            daily_bnh_pct = group['Price_Buy and Hold Portfolio'].pct_change().dropna()

            # Volatility
            price_volatility = daily_price_pct.std() * (len(daily_price_pct) ** 0.5)
            sentiment_volatility = daily_sentiment_pct.std() * (len(daily_sentiment_pct) ** 0.5)
            bnh_volatility = daily_bnh_pct.std() * (len(daily_bnh_pct) ** 0.5)

            # Max Drawdown
            price_cummax = group['Price_Portfolio Value'].cummax()
            price_drawdown = ((group['Price_Portfolio Value'] - price_cummax) / price_cummax).min() * 100

            sentiment_cummax = group['Sentiment_Portfolio Value'].cummax()
            sentiment_drawdown = ((group['Sentiment_Portfolio Value'] - sentiment_cummax) / sentiment_cummax).min() * 100

            bnh_cummax = group['Price_Buy and Hold Portfolio'].cummax()
            bnh_drawdown = ((group['Price_Buy and Hold Portfolio'] - bnh_cummax) / bnh_cummax).min() * 100

            # Sharpe Ratio (assuming risk-free rate = 0)
            price_sharpe = daily_price_pct.mean() / daily_price_pct.std() if daily_price_pct.std() != 0 else 0
            sentiment_sharpe = daily_sentiment_pct.mean() / daily_sentiment_pct.std() if daily_sentiment_pct.std() != 0 else 0
            bnh_sharpe = daily_bnh_pct.mean() / daily_bnh_pct.std() if daily_bnh_pct.std() != 0 else 0

            stats_result.append({
                'Asset': asset,
                'Timeframe': timeframe,
                'Price Volatility': price_volatility,
                'Sentiment Volatility': sentiment_volatility,
                'Buy & Hold Volatility': bnh_volatility,
                'Price Max Drawdown (%)': price_drawdown,
                'Sentiment Max Drawdown (%)': sentiment_drawdown,
                'Buy & Hold Max Drawdown (%)': bnh_drawdown,
                'Price Sharpe Ratio': price_sharpe,
                'Sentiment Sharpe Ratio': sentiment_sharpe,
                'Buy & Hold Sharpe Ratio': bnh_sharpe
            })
        except Exception as e:
            print(f"Error calculating stats for {asset} {timeframe}: {e}")

    # Convert to DataFrames
    returns_df = pd.DataFrame(returns_result)
    stats_df = pd.DataFrame(stats_result)

    # Merge
    combined_summary_df = pd.merge(returns_df, stats_df, on=['Asset', 'Timeframe'])

    # Export
    combined_summary_df.to_csv('bollinger_backtest_summary_stats.csv', index=False)

    print("\n===== Combined Summary Stats =====")
    print(combined_summary_df.head())

# === Call the function ===
calculate_combined_portfolio_statistics(final_combined_df)

from scipy.stats import ttest_rel
import numpy as np

# === STEP: Statistical Testing ===

def perform_statistical_tests(summary_df):
    print("\n===== Performing Statistical Tests on Returns =====")

    results = []

    # Extract returns
    price_returns = summary_df['Price Total Return (%)'].values
    sentiment_returns = summary_df['Sentiment Total Return (%)'].values
    bnh_returns = summary_df['Buy & Hold Return (%)'].values

    # --- Paired t-tests ---

    # Price vs Sentiment
    t_stat_ps, p_val_ps = ttest_rel(price_returns, sentiment_returns, nan_policy='omit')

    # Price vs Buy-and-Hold
    t_stat_pb, p_val_pb = ttest_rel(price_returns, bnh_returns, nan_policy='omit')

    # Sentiment vs Buy-and-Hold
    t_stat_sb, p_val_sb = ttest_rel(sentiment_returns, bnh_returns, nan_policy='omit')

    results.append({
        'Comparison': 'Price vs Sentiment',
        'p-value': p_val_ps,
        'Significant (<0.05)': p_val_ps < 0.05
    })
    results.append({
        'Comparison': 'Price vs Buy & Hold',
        'p-value': p_val_pb,
        'Significant (<0.05)': p_val_pb < 0.05
    })
    results.append({
        'Comparison': 'Sentiment vs Buy & Hold',
        'p-value': p_val_sb,
        'Significant (<0.05)': p_val_sb < 0.05
    })

    stats_results_df = pd.DataFrame(results)
    print(stats_results_df)

    # Export
    stats_results_df.to_csv('bollinger_backtest_return_ttest.csv', index=False)

# === Load previously saved summary stats ===
summary_stats_df = pd.read_csv('bollinger_backtest_summary_stats.csv')

# Run test
perform_statistical_tests(summary_stats_df)

# === STEP X: Add Optimal Strategy Column ===

def add_optimal_strategy(summary_df):
    def determine_optimal(row):
        strategies = {
            'Price': row['Price Total Return (%)'],
            'Sentiment': row['Sentiment Total Return (%)'],
            'Buy & Hold': row['Buy & Hold Return (%)']
        }
        optimal = max(strategies, key=strategies.get)
        return optimal

    summary_df['Optimal Strategy'] = summary_df.apply(determine_optimal, axis=1)
    print("\n===== Summary Stats with Optimal Strategy =====")
    print(summary_df[['Asset', 'Timeframe', 'Optimal Strategy']].head())

    # Export updated summary
    summary_df.to_csv('bollinger_backtest_summary_stats.csv', index=False)
    return summary_df

# === Call after existing summary stats generation ===
summary_stats_df = add_optimal_strategy(summary_stats_df)



===== Combined Summary Stats =====
  Asset Timeframe  Price Total Return (%)  Sentiment Total Return (%)  \
0  AAPL   120-day              -13.567584                    0.000000   
1  AAPL   150-day               -5.042360                    1.639835   
2  AAPL    30-day                0.000000                    4.429764   
3  AAPL    60-day                2.549661                    0.000000   
4  AAPL    90-day              -10.356954                    0.000000   

   Buy & Hold Return (%)  Price Volatility  Sentiment Volatility  \
0             -10.416026          0.258207              0.000000   
1             -15.978574          0.269722              0.016398   
2               2.714955          0.000000              0.033293   
3               2.549661          0.133613              0.000000   
4             -10.356954          0.253499              0.000000   

   Buy & Hold Volatility  Price Max Drawdown (%)  Sentiment Max Drawdown (%)  \
0               0.264120            