In [20]:
import pandas as pd
import numpy as np
import psycopg2
from sklearn.preprocessing import MinMaxScaler

import pandas as pd
from datetime import datetime, timedelta
import time

# ✅ Automatically set `end_date` to today's date
end_date = time.strftime('%Y-%m-%d', time.gmtime(time.time()))  # Today's date as a string

# ✅ Automatically set `start_date` to 60 days before `end_date`
start_date = (datetime.today() - timedelta(days=45)).strftime('%Y-%m-%d')

# ✅ Calculate the extended start date for sentiment analysis (21 days before `start_date`)
extended_start_date = (pd.to_datetime(start_date) - pd.Timedelta(days=21)).strftime('%Y-%m-%d')

# ✅ Convert start, extended start, and end dates to `Timestamp` for consistency
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
extended_start_date = pd.to_datetime(extended_start_date)

# ✅ Debug print to check the generated dates
print(f"Start Date: {start_date}")
print(f"Extended Start Date: {extended_start_date}")
print(f"End Date: {end_date}")



grp_terms = ['SOL', 'KAS', 'LINK', 'ADA', 'MATIC', 'AMZN', 'MSFT', 'AVAX', 'AAPL', 'GME', 'NVDA','JPM','DOGE' 
             'GOOGL','ETH','DXY', 'TSMC', 'CVX', 'COIN', 'POPCAT', 'SUI', 'HNT', 'NFLX', 'WIF', 'DIS', 'BTC', 'TSLA']

# Initialize a dictionary to collect DataFrames for each term
combined_data_dict = {}
# Create dictionaries to store different types of data
price_data_dict = {}  # Stores the raw price data
technical_indicators_dict = {}  # Stores the extended data with technical indicators
filtered_data_dict = {}  # Stores the filtered data after applying the technical indicators

# Database connection parameters
db_params = {
    'dbname': 'twt_snt',
    'user': 'postgres',
    'password': 'Ilpmnl!69gg',
    'host': 'localhost',
    'port': '5432'
}

# Function to fetch tweets from the database
def fetch_tweets(start_date, end_date, term):
    try:
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()
        query = """
            SELECT * FROM twt_tbl
            WHERE term = %s AND date BETWEEN %s AND %s
        """
        cursor.execute(query, (term, extended_start_date, end_date))
        rows = cursor.fetchall()
        columns = [desc[0] for desc in cursor.description]
        df = pd.DataFrame(rows, columns=columns)
        return df
    except Exception as e:
        print(f"Error fetching tweets: {e}")
        return None
    finally:
        cursor.close()
        conn.close()

# Function to fetch moving averages from the database
def fetch_moving_averages(extended_start_date, end_date, term):
    try:
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()
        query = """
            SELECT date, term, combined_compound_ma_7, combined_compound_ma_21, 
                   combined_compound_ma_50, combined_compound_ma_100, combined_compound_ma_200, combined_compound
            FROM snt_ma_blend_tbl
            WHERE term = %s AND date BETWEEN %s AND %s
        """
        cursor.execute(query, (term, extended_start_date, end_date))
        rows = cursor.fetchall()
        columns = [desc[0] for desc in cursor.description]
        df = pd.DataFrame(rows, columns=columns)
        return df
    except Exception as e:
        print(f"Error fetching moving averages: {e}")
        return None
    finally:
        cursor.close()
        conn.close()

# Function to fetch data from the database
def fetch_price_data(start_date, end_date, term):
    try:
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()
        query = """
            SELECT date, term, open, high, low, close, adj_close, volume,
            close_ma_7, close_ma_21, close_ma_50, close_ma_100, close_ma_200
            FROM yahoo_price_tbl
            WHERE term = %s AND date BETWEEN %s AND %s
        """
        cursor.execute(query, (term, start_date, end_date))
        rows = cursor.fetchall()
        columns = [desc[0] for desc in cursor.description]
        df = pd.DataFrame(rows, columns=columns)
        return df
    except Exception as e:
        print(f"Error fetching price data: {e}")
        return None
    finally:
        cursor.close()
        conn.close()


def fetch_predictions_data(start_date, end_date, term):
    try:
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()
        query = """
            SELECT prediction_date, term, prediction
            FROM predictions_tbl
            WHERE term = %s AND prediction_date BETWEEN %s AND %s
        """
        cursor.execute(query, (term, start_date, end_date))
        rows = cursor.fetchall()
        columns = [desc[0] for desc in cursor.description]
        df = pd.DataFrame(rows, columns=columns)
        return df
    except Exception as e:
        print(f"Error fetching predictions data: {e}")
        return None
    finally:
        cursor.close()
        conn.close()
        
        
def fetch_bollinger_data(start_date, end_date, term):
    try:
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()
        query = """
            SELECT * FROM bollinger_tbl
            WHERE term = %s AND date BETWEEN %s AND %s
        """
        cursor.execute(query, (term, start_date, end_date))
        rows = cursor.fetchall()
        columns = [desc[0] for desc in cursor.description]
        df = pd.DataFrame(rows, columns=columns)
        return df
    except Exception as e:
        print(f"Error fetching predictions data: {e}")
        return None
    finally:
        cursor.close()
        conn.close()
        

def fetch_signals(start_date, end_date, terms):
    # Ensure terms is always a list, even if it's a single term
    terms = [terms] if isinstance(terms, str) else terms
    db_params = {
        'dbname': 'twt_snt',
        'user': 'postgres',
        'password': 'Ilpmnl!69gg',
        'host': 'localhost',
        'port': '5432'
    }
    conn = psycopg2.connect(**db_params)
    try:
        cursor = conn.cursor()
        query = """
            SELECT *
            FROM signal_cnt_tbl
            WHERE term = ANY(%s) AND date BETWEEN %s AND %s
        """
        # Pass terms as a list to form a PostgreSQL array
        cursor.execute(query, (terms, start_date, end_date))
        rows = cursor.fetchall()
        columns = [desc[0] for desc in cursor.description]
        df = pd.DataFrame(rows, columns=columns)
        return df
    except Exception as e:
        print(f"Error fetching signals: {e}")
        return pd.DataFrame()
    finally:
        conn.close()


# Functions to calculate technical indicators (as given)
def calculate_rsi(series, period=14):
    delta = series.diff(1)
    gain = delta.where(delta > 0, 0.0)
    loss = -delta.where(delta < 0, 0.0)
    avg_gain = gain.rolling(window=period, min_periods=1).mean()
    avg_loss = loss.rolling(window=period, min_periods=1).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_stochastic_rsi(df, rsi_column, window=14):
    rsi_min = df[rsi_column].rolling(window=window, min_periods=1).min()
    rsi_max = df[rsi_column].rolling(window=window, min_periods=1).max()
    stoch_rsi = (df[rsi_column] - rsi_min) / (rsi_max - rsi_min)
    return stoch_rsi * 100

def calculate_mfi(df, window=14):
    typical_price = (df['high'] + df['low'] + df['close']) / 3
    money_flow = typical_price * df['volume']
    positive_flow = (money_flow.where(typical_price > typical_price.shift(1), 0)).rolling(window=window).sum()
    negative_flow = (money_flow.where(typical_price < typical_price.shift(1), 0)).rolling(window=window).sum()
    mfi = 100 - (100 / (1 + positive_flow / negative_flow))
    
    # Normalize MFI to range [-100, 100]
    mfi_normalized = 2 * mfi - 100
    return mfi_normalized

def calculate_sfi(df, window=14):
    # Fill missing volume data with the most recent non-zero or non-null value
    df['volume'] = df['volume'].fillna(method='ffill')
    
    typical_sentiment = df['combined_compound_ma_7']
    sentiment_flow = typical_sentiment * df['volume']
    positive_flow = (sentiment_flow.where(typical_sentiment > typical_sentiment.shift(1), 0)).rolling(window=window).sum()
    negative_flow = (sentiment_flow.where(typical_sentiment < typical_sentiment.shift(1), 0)).rolling(window=window).sum()
    sfi = 100 - (100 / (1 + positive_flow / negative_flow))
    
    # Normalize SFI to range [-100, 100]
    sfi_normalized = 2 * sfi - 100
    return sfi_normalized

def calculate_macd(series, short_window=12, long_window=26, signal_window=9):
    short_ema = series.ewm(span=short_window, adjust=False).mean()
    long_ema = series.ewm(span=long_window, adjust=False).mean()
    macd = short_ema - long_ema
    signal = macd.ewm(span=signal_window, adjust=False).mean()
    return macd, signal


def calculate_bollinger_bands(df, ma_column, window=20, num_std_dev=2, band_type='price'):
    df[f'{band_type}_MA'] = df[ma_column]
    df[f'{band_type}_STD'] = df[ma_column].rolling(window=window).std()
    df[f'{band_type}_Upper_Band'] = df[f'{band_type}_MA'] + (df[f'{band_type}_STD'] * num_std_dev)
    df[f'{band_type}_Lower_Band'] = df[f'{band_type}_MA'] - (df[f'{band_type}_STD'] * num_std_dev)
    return df

def scale_features_to_price(df, columns_to_scale, reference_column):
    scaled_columns = {}
    for col in columns_to_scale:
        scaler = MinMaxScaler(feature_range=(df[reference_column].min(), df[reference_column].max()))
        scaled_columns[f'scaled_{col}'] = scaler.fit_transform(df[[col]]).flatten()
    
    # Convert scaled_columns dictionary to a DataFrame
    scaled_df = pd.DataFrame(scaled_columns, index=df.index)
    
    # Concatenate the original DataFrame with the scaled DataFrame
    return pd.concat([df, scaled_df], axis=1)

def calculate_boll_upper_advanced(boll_upper_price, boll_lower_sent, boll_upper_sent, boll_lower_price):
    if boll_upper_price >= boll_lower_sent and boll_upper_sent >= boll_lower_price:
        # When the bands overlap, take the minimum of the upper bounds, but ensure it's above the lower bound
        return max(min(boll_upper_price, boll_upper_sent), boll_lower_price)
    else:
        # When there's no overlap, choose the upper band that is closer to the other band's lower boundary
        if abs(boll_upper_price - boll_lower_sent) < abs(boll_upper_sent - boll_lower_price):
            return max(boll_upper_price, boll_lower_price)
        else:
            return max(boll_upper_sent, boll_lower_price)

def calculate_boll_lower_advanced(boll_lower_price, boll_upper_sent, boll_lower_sent, boll_upper_price):
    if boll_lower_price <= boll_upper_sent and boll_lower_sent <= boll_upper_price:
        # When the bands overlap, take the maximum of the lower bounds, but ensure it's below the upper bound
        return min(max(boll_lower_price, boll_lower_sent), boll_upper_price)
    else:
        # When there's no overlap, choose the lower band that is closer to the other band's upper boundary
        if abs(boll_lower_price - boll_upper_sent) < abs(boll_lower_sent - boll_upper_price):
            return min(boll_lower_price, boll_upper_price)
        else:
            return min(boll_lower_sent, boll_upper_price)
        
def normalize_column(df, column):
    min_val = df[column].min()
    max_val = df[column].max()
    return ((df[column] - min_val) / (max_val - min_val)) * 100
        
    
# Function to find divergence
def find_MACD_price_divergence(df):
    divergence = ['None']  # Start with 'None' for the first row since no comparison can be made
    for i in range(1, len(df)):
        if df['close'].iloc[i] < df['close'].iloc[i-1] and df['MACD'].iloc[i] > df['MACD'].iloc[i-1]:
            divergence.append('Bullish MACD Price Divergence')
        elif df['close'].iloc[i] > df['close'].iloc[i-1] and df['MACD'].iloc[i] < df['MACD'].iloc[i-1]:
            divergence.append('Bearish MACD Price Divergence')
        else:
            divergence.append('None')
    return divergence

# Function to find divergence
def find_MACD_sentiment_divergence(df):
    divergence = ['None']  # Start with 'None' for the first row since no comparison can be made
    for i in range(1, len(df)):
        if df['close'].iloc[i] < df['close'].iloc[i-1] and df['Sentiment_MACD'].iloc[i] > df['Sentiment_MACD'].iloc[i-1]:
            divergence.append('Bullish MACD Sentiment Divergence')
        elif df['close'].iloc[i] > df['close'].iloc[i-1] and df['Sentiment_MACD'].iloc[i] < df['Sentiment_MACD'].iloc[i-1]:
            divergence.append('Bearish MACD Sentiment Divergence')
        else:
            divergence.append('None')
    return divergence


# Function to find divergence
def find_RSI_price_divergence(df):
    divergence = ['None']  # Start with 'None' for the first row since no comparison can be made
    for i in range(1, len(df)):
        if df['close'].iloc[i] < df['close'].iloc[i-1] and df['RSI'].iloc[i] > df['RSI'].iloc[i-1]:
            divergence.append('Bullish RSI Price Divergence')
        elif df['close'].iloc[i] > df['close'].iloc[i-1] and df['RSI'].iloc[i] < df['RSI'].iloc[i-1]:
            divergence.append('Bearish RSI Price Divergence')
        else:
            divergence.append('None')
    return divergence


# Function to find divergence
def find_RSI_sentiment_divergence(df):
    divergence = ['None']  # Start with 'None' for the first row since no comparison can be made
    for i in range(1, len(df)):
        if df['close'].iloc[i] < df['close'].iloc[i-1] and df['Sentiment_RSI'].iloc[i] > df['Sentiment_RSI'].iloc[i-1]:
            divergence.append('Bullish RSI Sentiment Divergence')
        elif df['close'].iloc[i] > df['close'].iloc[i-1] and df['Sentiment_RSI'].iloc[i] < df['Sentiment_RSI'].iloc[i-1]:
            divergence.append('Bearish RSI Sentiment Divergence')
        else:
            divergence.append('None')
    return divergence

        
        
        



Start Date: 2025-04-23 00:00:00
Extended Start Date: 2025-04-02 00:00:00
End Date: 2025-06-08 00:00:00


In [21]:

import pandas as pd
import numpy as np

# Define dictionaries to store different results
moving_averages_dict = {}
scaled_features_dict = {}
combined_data_dict = {}

# Iterate over each term in the group
for grp_term in grp_terms:
    print(f"Processing term: {grp_term}")

    # Step 1: Fetch price data for the extended range
    extended_price_df = fetch_price_data(extended_start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'), grp_term)
    
    if extended_price_df is None or extended_price_df.empty:
        print(f"No price data found for term: {grp_term}")
        continue

    # Step 1.1: Set 'date' as the index and ensure it's unique
    extended_price_df['date'] = pd.to_datetime(extended_price_df['date'])
    extended_price_df.set_index('date', inplace=True)
    extended_price_df = extended_price_df.loc[~extended_price_df.index.duplicated(keep='first')]

    # Step 2: Fetch moving averages data
    ma_db_df = fetch_moving_averages(start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'), grp_term)
    
    # Step 2.1: Fetch prediction data
    pred_db_df = fetch_predictions_data(start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'), grp_term)
    
    # Step 2.2: Fetch Bollinger data
    boll_db_df = fetch_bollinger_data(start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'), grp_term)

    signals_df = fetch_signals(start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'), grp_term)
    
    if ma_db_df is not None and not ma_db_df.empty:
        ma_db_df['date'] = pd.to_datetime(ma_db_df['date'])
        ma_db_df.set_index('date', inplace=True)

        # Drop the 'term' column from extended_price_df to avoid duplication during the join
        extended_price_df.drop(columns=['term'], errors='ignore', inplace=True)

        # Join moving average data to extended price data
        combined_data_df = ma_db_df.join(extended_price_df, how='left')
        
        # Store results
        moving_averages_dict[grp_term] = combined_data_df.copy()
        
        
        # Assuming signals_df has been fetched and contains a 'date' column

        # Check and prepare signals_df
        if signals_df is not None and not signals_df.empty:
            # Convert 'date' to datetime and set as index
            signals_df['date'] = pd.to_datetime(signals_df['date'])
            signals_df.set_index('date', inplace=True)
            signals_df.drop(columns=['term'], errors='ignore', inplace=True)  # Drop 'term' if not needed or duplicate

            # Rename the index to 'date' for consistency, if you prefer
            signals_df.index.rename('date', inplace=True)

            # Join signals data to extended price data
            combined_data_df = combined_data_df.join(signals_df, how='left')

            #print("Signals data integrated into combined DataFrame.")
        else:
            print("No signals data available or DataFrame is empty.")

        # Now combined_data_df includes the signals data
        
        
        # If prediction data is available, process and join it
        if pred_db_df is not None and not pred_db_df.empty:
            # Convert prediction_date to datetime and set as index
            pred_db_df['prediction_date'] = pd.to_datetime(pred_db_df['prediction_date'])
            pred_db_df.set_index('prediction_date', inplace=True)
            pred_db_df.drop(columns=['term'], errors='ignore', inplace=True)
            # Rename the index to 'date' for consistency, if you prefer
            pred_db_df.index.rename('date', inplace=True)

            # Now join prediction data to combined_data_df
            combined_data_df = combined_data_df.join(pred_db_df, how='left')
            
            
            
        # Ensure the index is a proper DatetimeIndex and sorted
        combined_data_df.index = pd.to_datetime(combined_data_df.index)
        combined_data_df = combined_data_df.sort_index()



        # DATE RELATED ERROR CORRECTING
        # Filter out rows with dates in the future
        current_date = pd.Timestamp.now().normalize()
        # combined_data_df = combined_data_df[combined_data_df.index <= current_date]
        # Ensure the index is a proper `DatetimeIndex` and sorted
        combined_data_df.index = pd.to_datetime(combined_data_df.index)
        combined_data_df = combined_data_df.sort_index()    
            
        # Step 4: Calculate Technical Indicators
        combined_data_df['RSI'] = calculate_rsi(combined_data_df['close'])
        combined_data_df['Stochastic_RSI'] = calculate_stochastic_rsi(combined_data_df, 'RSI')
        combined_data_df['MFI'] = calculate_mfi(combined_data_df)
        combined_data_df['SFI'] = calculate_sfi(combined_data_df)
        combined_data_df['MACD'], combined_data_df['MACD_Signal'] = calculate_macd(combined_data_df['close'])
        
        # Step 4.1: Calculate Sentiment Technical Indicators
        # Calculate RSI for sentiment data using 'daily_avg_combined_compound'
        combined_data_df['Sentiment_RSI'] = calculate_rsi(combined_data_df['combined_compound_ma_7'])
        combined_data_df['Sentiment_Stochastic_RSI'] = calculate_stochastic_rsi(combined_data_df, 'Sentiment_RSI')
        combined_data_df['Sentiment_MACD'], combined_data_df['Sentiment_MACD_Signal'] = calculate_macd(combined_data_df['combined_compound'])
        
        # Step 4.2-4.6: Calculate Sentiment Technical Indicators supporting features
        # Adding stdev for RSI and Sentiment_RSI
        #Step 4.21: Calculate the difference between RSI and Sentiment_RSI
        combined_data_df['RSI_Difference'] = combined_data_df['RSI'] - combined_data_df['Sentiment_RSI']

        # Step 4.22: Calculate the rolling standard deviation of this difference
        # You can specify the window size (e.g., 14 days) for the rolling standard deviation
        combined_data_df['RSI_Sentiment_STD'] = combined_data_df['RSI_Difference'].rolling(window=14).std().abs()
        # Now, combined_data_df['RSI_Sentiment_STD'] contains the standard deviation between RSI and Sentiment_RSI
        
        # Step 4.23: Calculate the rolling mean of the RSI_Difference
        combined_data_df['RSI_Difference_Mean'] = combined_data_df['RSI_Difference'].rolling(window=14).mean()

        # Step 4.24: Calculate the number of standard deviations from the mean
        combined_data_df['RSI_Difference_STD_Deviation'] = (
            (combined_data_df['RSI_Difference'] - combined_data_df['RSI_Difference_Mean']) /
            combined_data_df['RSI_Sentiment_STD']
        )
        # Now, combined_data_df['RSI_Difference_STD_Deviation'] contains the number of standard deviations from the mean for each day
        
        # Step 4.24: Identify if RSI_Difference_STD_Deviation is greater than 2
        combined_data_df['RSI_STD_above_2'] = abs(combined_data_df['RSI_Difference_STD_Deviation']) > 2

        # Step 4.25: execute the divergence function
        combined_data_df['RSI_Price_Divergence'] = find_RSI_price_divergence(combined_data_df)
        combined_data_df['RSI_Sentiment_Divergence'] = find_RSI_sentiment_divergence(combined_data_df)
        
        # Step 4.26: count the divergence recorded in the divergence function
        # Calculate consecutive counts for RSI Price divergence, similar to what was done with MACD
        combined_data_df['Consecutive_Count_RSI_Price_Divergence'] = (
            combined_data_df['RSI_Price_Divergence']
            .apply(lambda x: x if x != 'None' else None)
            .groupby((combined_data_df['RSI_Price_Divergence'] != combined_data_df['RSI_Price_Divergence'].shift()).cumsum())
            .cumcount()
            .where(combined_data_df['RSI_Price_Divergence'] != 'None', 0)
        )
        
        # Step 4.26A: Calculate consecutive counts for RSI Sentiment divergence, similar to what is done with MACD
        combined_data_df['Consecutive_Count_RSI_Sentiment_Divergence'] = (
            combined_data_df['RSI_Sentiment_Divergence']
            .apply(lambda x: x if x != 'None' else None)
            .groupby((combined_data_df['RSI_Sentiment_Divergence'] != combined_data_df['RSI_Sentiment_Divergence'].shift()).cumsum())
            .cumcount()
            .where(combined_data_df['RSI_Sentiment_Divergence'] != 'None', 0)
        )
        
        # Step 4.27 calculate RSI_Trend_Reversal variable
        combined_data_df['RSI_Overbought'] = (combined_data_df['RSI'] > 70) & (combined_data_df['Sentiment_RSI'] > 70)
        combined_data_df['RSI_Oversold'] = (combined_data_df['RSI'] < 30) & (combined_data_df['Sentiment_RSI'] < 30)

        # Create conditions for divergence
        combined_data_df['Bearish_Divergence'] = (combined_data_df['RSI_Price_Divergence'] == 'Bearish RSI Price Divergence') & (combined_data_df['RSI_Sentiment_Divergence'] == 'Bearish RSI Sentiment Divergence')
        combined_data_df['Bullish_Divergence'] = (combined_data_df['RSI_Price_Divergence'] == 'Bullish RSI Price Divergence') & (combined_data_df['RSI_Sentiment_Divergence'] == 'Bullish RSI Sentiment Divergence')

        # Create a new column for RSI Trend Reversal based on overbought/oversold levels and divergence
        combined_data_df['RSI_Trend_Reversal'] = np.where(
            (combined_data_df['RSI_Overbought'] & combined_data_df['Bearish_Divergence']),
            'Likely Downward Reversal',  # Bearish reversal when both RSI and Sentiment_RSI are overbought and bearish divergence occurs
            np.where(
                (combined_data_df['RSI_Oversold'] & combined_data_df['Bullish_Divergence']),
                'Likely Upward Reversal',  # Bullish reversal when both RSI and Sentiment_RSI are oversold and bullish divergence occurs
                'No Reversal'  # Default value when no reversal condition is met
            )
        )
        
        # Step 4.3: Calculate the rolling mean of the stoch RSI_Difference
        # Apply smoothing to the Sentiment_Stochastic_RSI using a moving average or EMA; Here we use a 2-period EMA for smoothing
        smoothing_window = 2
        combined_data_df['Smoothed_Sentiment_Stochastic_RSI'] = combined_data_df['Sentiment_Stochastic_RSI'].ewm(span=smoothing_window, adjust=False).mean()

        # Adding stdev for RSI and Sentiment_RSI
        # Step 4.31: Calculate the difference between Stochastic_RSI and Sentiment_Stochastic_RSI
        combined_data_df['Stoch_RSI_Difference'] = combined_data_df['Stochastic_RSI'] - combined_data_df['Sentiment_Stochastic_RSI']

        
        # Step 4.32: Calculate the rolling standard deviation of this difference
        # You can specify the window size (e.g., 14 days) for the rolling standard deviation
        combined_data_df['Stoch_RSI_Sentiment_STD'] = combined_data_df['Stoch_RSI_Difference'].rolling(window=14).std().abs()

        # Step 4.33: Calculate the rolling mean of the RSI_Difference
        combined_data_df['Stoch_RSI_Difference_Mean'] = combined_data_df['Stoch_RSI_Difference'].rolling(window=14).mean()

        # Step 4.34: Calculate the number of standard deviations from the mean
        combined_data_df['Stoch_RSI_Difference_STD_Deviation'] = (
            (combined_data_df['Stoch_RSI_Difference'] - combined_data_df['Stoch_RSI_Difference_Mean']) /
            combined_data_df['Stoch_RSI_Sentiment_STD']
        )
        
        # Step 4.35: Identify if Stoch_RSI_Difference_STD_Deviation is greater than 2
        combined_data_df['Stoch_RSI_STD_above_2'] = abs(combined_data_df['Stoch_RSI_Difference_STD_Deviation']) > 2
        
        #  # Step 4.365: count how many periods stoch RSI is in extreme position
        # Initialize the counter columns
        combined_data_df['Stoch_RSI_Both_Extreme_Counter'] = 0

        # Iterate through the DataFrame to update the counter
        for i in range(1, len(combined_data_df)):
            if combined_data_df.iloc[i]['Stochastic_RSI'] > 80 and combined_data_df.iloc[i]['Sentiment_Stochastic_RSI'] > 80:
                if combined_data_df.iloc[i-1]['Stoch_RSI_Both_Extreme_Counter'] > 0:  # Continuation of a positive streak
                    combined_data_df.iloc[i, combined_data_df.columns.get_loc('Stoch_RSI_Both_Extreme_Counter')] = combined_data_df.iloc[i-1]['Stoch_RSI_Both_Extreme_Counter'] + 1
                else:  # Start of a new positive streak
                    combined_data_df.iloc[i, combined_data_df.columns.get_loc('Stoch_RSI_Both_Extreme_Counter')] = 1
            elif combined_data_df.iloc[i]['Stochastic_RSI'] < 20 and combined_data_df.iloc[i]['Sentiment_Stochastic_RSI'] < 20:
                if combined_data_df.iloc[i-1]['Stoch_RSI_Both_Extreme_Counter'] < 0:  # Continuation of a negative streak
                    combined_data_df.iloc[i, combined_data_df.columns.get_loc('Stoch_RSI_Both_Extreme_Counter')] = combined_data_df.iloc[i-1]['Stoch_RSI_Both_Extreme_Counter'] - 1
                else:  # Start of a new negative streak
                    combined_data_df.iloc[i, combined_data_df.columns.get_loc('Stoch_RSI_Both_Extreme_Counter')] = -1
            else:
                combined_data_df.iloc[i, combined_data_df.columns.get_loc('Stoch_RSI_Both_Extreme_Counter')] = 0  # Reset the counter if the condition is not met or both are zero

            # Additional condition to reset counter if both values are exactly zero
            if combined_data_df.iloc[i]['Stochastic_RSI'] == 0 and combined_data_df.iloc[i]['Sentiment_Stochastic_RSI'] == 0:
                combined_data_df.iloc[i, combined_data_df.columns.get_loc('Stoch_RSI_Both_Extreme_Counter')] = 0
        
        
        # Step 4.40 Calculate extra goodies for the MACD 
        # Scale Sentiment_MACD to the scale of MACD
        macd_scaler = MinMaxScaler(feature_range=(combined_data_df['MACD'].min(), combined_data_df['MACD'].max()))
        combined_data_df['scaled_Sentiment_MACD'] = macd_scaler.fit_transform(combined_data_df[['Sentiment_MACD']]).flatten()

        # Step 4.41 Scale Sentiment_MACD_Signal to the scale of MACD_Signal
        macd_signal_scaler = MinMaxScaler(feature_range=(combined_data_df['MACD_Signal'].min(), combined_data_df['MACD_Signal'].max()))
        combined_data_df['scaled_Sentiment_MACD_Signal'] = macd_signal_scaler.fit_transform(combined_data_df[['Sentiment_MACD_Signal']]).flatten()

        #Step 4.42 Calculate the MACD histogram for price data
        combined_data_df['MACD_Histogram'] = combined_data_df['MACD'] - combined_data_df['MACD_Signal']

        #Step 4.43 Calculate the MACD histogram for sentiment data
        combined_data_df['Sentiment_MACD_Histogram'] = combined_data_df['scaled_Sentiment_MACD'] - combined_data_df['scaled_Sentiment_MACD_Signal']

        #Step 4.44: Calculate the difference between MACD_Signal and Sentiment_MACD_Signal
        combined_data_df['MACD_Signal_Difference'] = combined_data_df['MACD_Signal'] - combined_data_df['scaled_Sentiment_MACD_Signal']

        #Step 4.45: Calculate the rolling standard deviation of this difference
        combined_data_df['MACD_Signal_Sentiment_STD'] = combined_data_df['MACD_Signal_Difference'].rolling(window=14).std().abs()

        #Step 4.46: Calculate the rolling mean of the MACD_Signal_Difference
        combined_data_df['MACD_Signal_Difference_Mean'] = combined_data_df['MACD_Signal_Difference'].rolling(window=14).mean()

        #Step 4.46: Calculate the number of standard deviations from the mean
        combined_data_df['MACD_Signal_Difference_STD_Deviation'] = (
            (combined_data_df['MACD_Signal_Difference'] - combined_data_df['MACD_Signal_Difference_Mean']) /
            combined_data_df['MACD_Signal_Sentiment_STD']
        )

        # Step 4.47: Identify if MACD_Signal_Difference_STD_Deviation is greater than 2
        macd_signal_condition_above_2 = abs(combined_data_df['MACD_Signal_Difference_STD_Deviation']) > 2

        # Step 4.48: Record the MACD_Signal_Difference_STD_Deviation directly to the DataFrame
        combined_data_df['MACD_Signal_trend_reversal'] = np.where(
            macd_signal_condition_above_2,
            combined_data_df['MACD_Signal_Difference_STD_Deviation'],
            0
        )

        # Step 4.49: Identify if there is a cross between Sentiment_MACD_Signal and MACD_Signal and record it
        # Capture positive and negative crosses for future analysis in Tableau
        combined_data_df['MACD_Signal_Cross'] = np.where(
            (combined_data_df['scaled_Sentiment_MACD_Signal'] > combined_data_df['MACD_Signal']) &
            (combined_data_df['scaled_Sentiment_MACD_Signal'].shift(1) <= combined_data_df['MACD_Signal'].shift(1)),
            1,  # Bullish cross
            np.where(
                (combined_data_df['scaled_Sentiment_MACD_Signal'] < combined_data_df['MACD_Signal']) &
                (combined_data_df['scaled_Sentiment_MACD_Signal'].shift(1) >= combined_data_df['MACD_Signal'].shift(1)),
                -1,  # Bearish cross
                0  # No cross
            )
        )

        # Step 4.491: Create a new column to capture the significance of the cross using both deviation and the MACD difference
        combined_data_df['MACD_Cross_Significance'] = np.where(
            combined_data_df['MACD_Signal_Cross'] != 0,
            combined_data_df['MACD_Signal_trend_reversal'] * combined_data_df['MACD_Signal_Cross'],
            0
        )

        # Step 4.492: Determine the direction of the Sentiment MACD Signal
        combined_data_df['Sentiment_MACD_Signal_Direction'] = np.where(
            combined_data_df['scaled_Sentiment_MACD_Signal'] > combined_data_df['scaled_Sentiment_MACD_Signal'].shift(1),
            1,  # Upward direction
            np.where(
                combined_data_df['scaled_Sentiment_MACD_Signal'] < combined_data_df['scaled_Sentiment_MACD_Signal'].shift(1),
                -1,  # Downward direction
                0  # No change
            )
        )
        

        #Step 4.493 Find MACD Price divergence and store it in the DataFrame
        combined_data_df['MACD_Price_Divergence'] = find_MACD_price_divergence(combined_data_df)
        combined_data_df['Consecutive_Count_MACD_Price_Divergence'] = combined_data_df['MACD_Price_Divergence'].apply(lambda x: x if x != 'None' else None).groupby((combined_data_df['MACD_Price_Divergence'] != combined_data_df['MACD_Price_Divergence'].shift()).cumsum()).cumcount().where(combined_data_df['MACD_Price_Divergence'] != 'None', 0)
 
        #Step 4.494 Find MACD sentiment divergence and store it in the DataFrame
        combined_data_df['MACD_Sentiment_Divergence'] = find_MACD_sentiment_divergence(combined_data_df)
        combined_data_df['Consecutive_Count_MACD_Sentiment_Divergence'] = combined_data_df['MACD_Sentiment_Divergence'].apply(lambda x: x if x != 'None' else None).groupby((combined_data_df['MACD_Sentiment_Divergence'] != combined_data_df['MACD_Sentiment_Divergence'].shift()).cumsum()).cumcount().where(combined_data_df['MACD_Sentiment_Divergence'] != 'None', 0)
 
        # Step 7: Scale selected sentiment features to match the scale of the 'close' price
        columns_to_scale = [
            'combined_compound_ma_7', 'combined_compound_ma_21', 'combined_compound_ma_50',
            'combined_compound_ma_100', 'combined_compound_ma_200', 'combined_compound'
        ]
        combined_data_df = scale_features_to_price(combined_data_df, columns_to_scale, 'close')
        
        # Step 5: Generate previous day and trend indicators
        ma_columns = [col for col in combined_data_df.columns if 'combined_compound' in col or 'close_ma' in col or 'scaled_combined_compound' in col]

        for ma_column in ma_columns:
            combined_data_df[f'prev_{ma_column}'] = combined_data_df[ma_column].shift(1)
            combined_data_df[f'{ma_column}_trend'] = (combined_data_df[ma_column] > combined_data_df[f'prev_{ma_column}']).astype(int)
            combined_data_df[f'{ma_column}_pct_change'] = combined_data_df[ma_column].pct_change() * 100
            combined_data_df[f'{ma_column}_direction_change_flag'] = combined_data_df[f'{ma_column}_trend'].diff().apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))

        # Step 6: Volume Moving Averages
        ma_vol_df = combined_data_df[['volume']].copy()
        for ma in [7, 20]:
            ma_vol_df[f'{ma}_day_MA_Volume'] = ma_vol_df['volume'].rolling(window=ma, min_periods=1).mean()

        # Join Volume moving averages
        combined_data_df = combined_data_df.join(ma_vol_df, how='left', rsuffix='_vol')

        # Fill NaN values for moving average columns
        for column in combined_data_df.columns:
            if '_MA' in column:
                combined_data_df[column].fillna(method='ffill', inplace=True)
                combined_data_df[column].fillna(method='bfill', inplace=True)
           
        # fill Close for Bollinger band display only
        combined_data_df['close_fill'] = combined_data_df['close']    
        fill_columns = ['close_fill']

        for column in fill_columns:
                combined_data_df[column].fillna(method='ffill', inplace=True)
                combined_data_df[column].fillna(method='bfill', inplace=True)
        

        # Create High Volume Flags
        combined_data_df['High_Volume_7'] = (combined_data_df['volume'] > combined_data_df['7_day_MA_Volume']).astype(int)
        combined_data_df['High_Volume_20'] = (combined_data_df['volume'] > combined_data_df['20_day_MA_Volume']).astype(int)

        # Apply forward fill and backfill for numerical values
        # combined_data_df.fillna(method='ffill', inplace=True)
        #combined_data_df.fillna(method='bfill', inplace=True)



        # Step 8: Add trend columns for scaled values
        combined_data_df['3_day_avg_combined_compound_ma_7'] = combined_data_df['combined_compound_ma_7'].rolling(window=3).mean()
        combined_data_df['3_day_avg_combined_compound_ma_7_prev'] = combined_data_df['3_day_avg_combined_compound_ma_7'].shift(1)
        combined_data_df['3_day_avg_combined_compound_ma_7_trend'] = (combined_data_df['3_day_avg_combined_compound_ma_7'] > combined_data_df['3_day_avg_combined_compound_ma_7_prev']).astype(int)

        # Step 9: Calculate differences between scaled values and close moving averages
        if 'close_ma_7' in combined_data_df.columns and 'scaled_combined_compound_ma_7' in combined_data_df.columns:
            combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
            mean_difference = combined_data_df['ma_7_diff'].mean()
            std_difference = combined_data_df['ma_7_diff'].std()
            combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
        else:
            print(f"Required columns not found for {grp_term}. Skipping difference calculations.")
            
            
        # Use the extended data for Bollinger Band calculation
        # Step 2: Calculate the standard deviation for sentiment using 'combined_compound_ma_7' over the extended data range
        combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()

        # Step 3: Calculate the sentiment Upper and Lower Bollinger Bands using the extended data
        sentiment_ma_column = 'combined_compound_ma_7'
        num_std_dev = 2
        combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
        combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)

        # Step 4: Ensure sentiment_Upper_Band is always above sentiment_Lower_Band
        mask = combined_data_df['sentiment_Upper_Band'] < combined_data_df['sentiment_Lower_Band']
        combined_data_df.loc[mask, ['sentiment_Upper_Band', 'sentiment_Lower_Band']] = combined_data_df.loc[mask, ['sentiment_Lower_Band', 'sentiment_Upper_Band']].values

        # Step 5: Calculate the price Bollinger Bands using the close price moving average ('close_ma_21') over the extended data range
        price_ma_column = 'close_ma_21'
        combined_data_df['price_STD'] = combined_data_df[price_ma_column].rolling(window=20).std()

        # Calculate Upper and Lower Bollinger Bands for the price
        combined_data_df['price_Upper_Band'] = combined_data_df[price_ma_column] + (combined_data_df['price_STD'] * num_std_dev)
        combined_data_df['price_Lower_Band'] = combined_data_df[price_ma_column] - (combined_data_df['price_STD'] * num_std_dev)

        # Step 6: Calculate sentiment divergence based on aligned data
        # This is done on the extended data, and later we'll trim the result
        sentiment_divergence = combined_data_df[['sentiment_Upper_Band', 'sentiment_Lower_Band']].sub(
            combined_data_df[['price_Upper_Band', 'price_Lower_Band']].values
        )
        sentiment_divergence['divergence'] = sentiment_divergence.abs().sum(axis=1)

        # Add sentiment divergence to the DataFrame
        combined_data_df['sentiment_divergence'] = sentiment_divergence['divergence']

        # Step 7: Calculate the adjusted overlap upper and lower bands
        combined_data_df['boll_upper_overlap_band'] = combined_data_df.apply(
            lambda row: calculate_boll_upper_advanced(
                row['price_Upper_Band'], 
                row['sentiment_Lower_Band'], 
                row['sentiment_Upper_Band'], 
                row['price_Lower_Band']
            ), axis=1
        )

        combined_data_df['boll_lower_overlap_band'] = combined_data_df.apply(
            lambda row: calculate_boll_lower_advanced(
                row['price_Lower_Band'], 
                row['sentiment_Upper_Band'], 
                row['sentiment_Lower_Band'], 
                row['price_Upper_Band']
            ), axis=1
        )
        
        # Crossovers
        # Calculate the combined normalized scores for all moving averages
        for ma in [7, 21, 50, 100, 200]:
            sentiment_ma_col = f'combined_compound_ma_{ma}'
            price_ma_col = f'close_ma_{ma}'

            combined_data_df[sentiment_ma_col].fillna(method='ffill', inplace=True)
            combined_data_df[price_ma_col].fillna(method='ffill', inplace=True)

            normalized_sentiment_col = f'normalized_sentiment_{ma}'
            normalized_price_col = f'normalized_price_{ma}'

            combined_data_df[normalized_sentiment_col] = normalize_column(combined_data_df, sentiment_ma_col)
            combined_data_df[normalized_price_col] = normalize_column(combined_data_df, price_ma_col)

            # Calculate combined normalized score using weights
            combined_data_df[f'combined_normalized_score_{ma}'] = (
                combined_data_df[normalized_sentiment_col] * 0.8 +
                combined_data_df[normalized_price_col] * 0.2
            )
        # Iterate over each moving average period and calculate crossovers dynamically
        for ma in [7, 21, 50, 100, 200]:
            # Dynamic column names
            normalized_sentiment_col = f'normalized_sentiment_{ma}'
            normalized_price_col = f'normalized_price_{ma}'
            crossover_column = f'crossover_{ma}'
            crossover_type_column = f'crossover_type_{ma}'

            # Calculate the crossover points for each moving average using normalized columns and store the 'Close' value
            combined_data_df[crossover_column] = np.where(
                (combined_data_df[normalized_sentiment_col] > combined_data_df[normalized_price_col]) & 
                (combined_data_df[normalized_sentiment_col].shift(1) <= combined_data_df[normalized_price_col].shift(1)) |
                (combined_data_df[normalized_sentiment_col] < combined_data_df[normalized_price_col]) & 
                (combined_data_df[normalized_sentiment_col].shift(1) >= combined_data_df[normalized_price_col].shift(1)), 
                combined_data_df['close'], 
                np.nan
            )

            # Define the crossover type (up or down)
            combined_data_df[crossover_type_column] = np.where(
                (combined_data_df[normalized_sentiment_col] > combined_data_df[normalized_price_col]) & 
                (combined_data_df[normalized_sentiment_col].shift(1) <= combined_data_df[normalized_price_col].shift(1)),
                'cross_up',
                np.where(
                    (combined_data_df[normalized_sentiment_col] < combined_data_df[normalized_price_col]) & 
                    (combined_data_df[normalized_sentiment_col].shift(1) >= combined_data_df[normalized_price_col].shift(1)),
                    'cross_down',
                    np.nan
                )
            )
            
        combined_data_df['prev_close_up_down'] = combined_data_df['close'].diff().apply(lambda x: 1 if x > 0 else 0)
        
        
        # === DEBUG BLOCK: Check for 2025-xx-xx in all relevant DataFrames ===
        debug_date = end_date

        print("\n🔍 DEBUG CHECK — Row presence for: ")

        for label, df in {
            "extended_price_df": extended_price_df,
            "ma_db_df": ma_db_df,
            "signals_df": signals_df,
            "pred_db_df": pred_db_df,
            "combined_data_df (final)": combined_data_df
        }.items():
            if df is not None and not df.empty:
                try:
                    df.index = pd.to_datetime(df.index)  # ensure datetime index
                    has_row = debug_date in df.index
                    print(f"✅ {label}: {'FOUND' if has_row else '❌ MISSING'}")
                except Exception as e:
                    print(f"⚠️ {label}: Error checking — {e}")
            else:
                print(f"⚠️ {label} is None or empty.")

        # Step 8: Trim the DataFrame to fit the original date window (start_date to end_date)
        combined_data_df = combined_data_df.loc[start_date:end_date]

        # Step 11: Remove duplicate columns if they exist
        combined_data_df = combined_data_df.loc[:, ~combined_data_df.columns.duplicated()]

        # Step 12: Store the final DataFrame in the dictionary
        moving_averages_dict[grp_term] = combined_data_df.copy()

# Step 13: Concatenate all DataFrames from the dictionary after processing each term
final_combined_data_df = pd.concat(moving_averages_dict.values(), axis=0)

# Optionally, reset index if you need to work with the 'date' column directly
final_combined_data_df.reset_index(inplace=True)


# Display the final DataFrame or inspect it as needed
print("Final Combined DataFrame:")
display(final_combined_data_df.tail(20))

# Save the final combined DataFrame to a CSV file
final_combined_data_df.to_csv('final_combined_data.csv', index=False)

Processing term: SOL


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: FOUND
✅ signals_df: FOUND
✅ pred_db_df: FOUND
✅ combined_data_df (final): FOUND
Processing term: KAS


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: FOUND
✅ signals_df: FOUND
✅ pred_db_df: FOUND
✅ combined_data_df (final): FOUND
Processing term: LINK


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: FOUND
✅ signals_df: FOUND
✅ pred_db_df: FOUND
✅ combined_data_df (final): FOUND
Processing term: ADA


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: FOUND
✅ signals_df: FOUND
✅ pred_db_df: FOUND
✅ combined_data_df (final): FOUND
Processing term: MATIC


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: FOUND
✅ signals_df: FOUND
✅ pred_db_df: FOUND
✅ combined_data_df (final): FOUND
Processing term: AMZN


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: MSFT


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: AVAX


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: FOUND
✅ signals_df: FOUND
✅ pred_db_df: FOUND
✅ combined_data_df (final): FOUND
Processing term: AAPL


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: GME


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: NVDA


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: JPM


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: DOGEGOOGL
No price data found for term: DOGEGOOGL
Processing term: ETH


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: FOUND
✅ signals_df: FOUND
✅ pred_db_df: FOUND
✅ combined_data_df (final): FOUND
Processing term: DXY
No price data found for term: DXY
Processing term: TSMC


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: CVX


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: COIN


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: POPCAT


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: FOUND
✅ signals_df: FOUND
✅ pred_db_df: FOUND
✅ combined_data_df (final): FOUND
Processing term: SUI


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: FOUND
✅ signals_df: FOUND
✅ pred_db_df: FOUND
✅ combined_data_df (final): FOUND
Processing term: HNT


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: NFLX


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: WIF


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: FOUND
✅ signals_df: FOUND
✅ pred_db_df: FOUND
✅ combined_data_df (final): FOUND
Processing term: DIS


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Processing term: BTC


  combined_data_df['ma_7_diff'] = combined_data_df['close_ma_7'] - combined_data_df['scaled_combined_compound_ma_7']
  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: FOUND
✅ ma_db_df: FOUND
✅ signals_df: FOUND
✅ pred_db_df: FOUND
✅ combined_data_df (final): FOUND
Processing term: TSLA


  combined_data_df['ma_7_diff_std'] = (combined_data_df['ma_7_diff'] - mean_difference) / std_difference
  combined_data_df['sentiment_STD'] = combined_data_df['combined_compound_ma_7'].rolling(window=20).std()
  combined_data_df['sentiment_Upper_Band'] = combined_data_df[sentiment_ma_column] + (combined_data_df['sentiment_STD'] * num_std_dev)
  combined_data_df['sentiment_Lower_Band'] = combined_data_df[sentiment_ma_column] - (combined_data_df['sentiment_STD'] * num_std_dev)



🔍 DEBUG CHECK — Row presence for: 
✅ extended_price_df: ❌ MISSING
✅ ma_db_df: ❌ MISSING
✅ signals_df: FOUND
✅ pred_db_df: ❌ MISSING
✅ combined_data_df (final): ❌ MISSING
Final Combined DataFrame:


  final_combined_data_df.reset_index(inplace=True)


Unnamed: 0,date,term,combined_compound_ma_7,combined_compound_ma_21,combined_compound_ma_50,combined_compound_ma_100,combined_compound_ma_200,combined_compound,open,high,...,crossover_type_7,crossover_21,crossover_type_21,crossover_50,crossover_type_50,crossover_100,crossover_type_100,crossover_200,crossover_type_200,prev_close_up_down
1093,2025-05-19,TSLA,0.122077,0.111684,0.082183,0.058887,0.042653,0.070925,336.3,343.0,...,cross_down,,,,,,,,,0
1094,2025-05-20,TSLA,0.08869,0.100488,0.07851,0.057494,0.042115,-0.011468,347.87,354.9899,...,,,,,,,,,,1
1095,2025-05-21,TSLA,0.077345,0.09529,0.07713,0.057213,0.042127,0.043311,344.43,347.35,...,,,,,,,,,,0
1096,2025-05-22,TSLA,0.104363,0.103484,0.081376,0.059751,0.043553,0.185414,331.9,347.27,...,,,,,,,,,,1
1097,2025-05-23,TSLA,0.137439,0.115591,0.087466,0.063255,0.045474,0.236668,337.92,343.18,...,,,,,,,,,,0
1098,2025-05-24,TSLA,0.140123,0.118554,0.089847,0.064936,0.046496,0.148176,,,...,,,,,,,,,,0
1099,2025-05-25,TSLA,0.154439,0.12572,0.094064,0.067559,0.047997,0.197385,,,...,cross_up,,,,,,,,,0
1100,2025-05-26,TSLA,0.106047,0.110734,0.088841,0.065447,0.047131,-0.039128,,,...,cross_down,,,,,,,,,0
1101,2025-05-27,TSLA,0.091131,0.104884,0.087176,0.065069,0.047123,0.046382,347.35,363.79,...,,,,,,,,,,0
1102,2025-05-28,TSLA,0.077562,0.098699,0.085202,0.06451,0.047021,0.036857,364.84,365.0,...,,356.9,cross_down,,,,,,,0
