<a href="https://colab.research.google.com/github/NathanDietrich/Artificial-Intelligence-and-Machine-Learning-portfolio/blob/main/Pipeline_and_Daily.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install yfinance textblob



In [2]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m129.1/129.1 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [3]:
import os
import tensorflow as tf
from tensorflow.keras import mixed_precision

# ‚úÖ Enable GPU & Force TensorFlow to Use It
gpu_devices = tf.config.list_physical_devices('GPU')
if gpu_devices:
    try:
        tf.config.experimental.set_memory_growth(gpu_devices[0], True)
        print(f"‚úÖ GPU detected: {gpu_devices[0].name} (Memory Growth Enabled)")
    except:
        print("‚ö†Ô∏è GPU found, but could not enable memory growth.")
else:
    print("‚ùå No GPU detected. Running on CPU.")

# ‚úÖ Enable Mixed Precision for Faster Training (Uses float16 on GPU)
mixed_precision.set_global_policy('mixed_float16')
print("‚úÖ Mixed Precision Enabled (float16) for Faster GPU Training")

# ‚úÖ Check GPU Usage Before Training
!nvidia-smi --query-gpu=memory.used,memory.total --format=csv

# ‚úÖ Function to Monitor GPU Usage Live
def monitor_gpu():
    print("\nüîç Checking GPU Usage...")
    os.system("nvidia-smi --query-gpu=memory.used,memory.total --format=csv")

monitor_gpu()

‚úÖ GPU detected: /physical_device:GPU:0 (Memory Growth Enabled)
‚úÖ Mixed Precision Enabled (float16) for Faster GPU Training
memory.used [MiB], memory.total [MiB]
2 MiB, 15360 MiB

üîç Checking GPU Usage...


In [None]:
import os
import requests
import datetime
import pandas as pd
import time
from textblob import TextBlob
from google.colab import drive
from google.colab import userdata
userdata.get('Polygon_Key')

# Mount Google Drive for saving raw data
drive.mount('/content/drive')

# --- Provided functions for stock & sentiment data collection ---
def fetch_stock_data_polygon(ticker, start_date, end_date, api_key):
    """
    Fetches historical stock data from Polygon.io.
    """
    url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/1/day/{start_date}/{end_date}?apiKey={api_key}"
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Error fetching stock data for {ticker}: {response.text}")
        return None
    data = response.json()
    if "results" not in data:
        print(f"No results found for {ticker}.")
        return None
    df = pd.DataFrame(data["results"])
    df["Date"] = pd.to_datetime(df["t"], unit="ms").dt.date
    df.rename(columns={"o": "Open", "h": "High", "l": "Low", "c": "Close", "v": "Volume"}, inplace=True)
    df = df[["Date", "Open", "High", "Low", "Close", "Volume"]]
    return df

def fetch_sentiment_data_polygon(ticker, start_date, end_date, api_key, limit=1000):
    """
    Fetches sentiment data from Polygon.io in chunks and computes daily sentiment scores.
    """
    url = "https://api.polygon.io/v2/reference/news"
    all_results = []
    current_start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    final_end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    while current_start_date < final_end_date:
        chunk_end_date = current_start_date + datetime.timedelta(days=30)
        if chunk_end_date > final_end_date:
            chunk_end_date = final_end_date
        chunk_start_str = current_start_date.strftime("%Y-%m-%d")
        chunk_end_str = chunk_end_date.strftime("%Y-%m-%d")
        print(f"üì° Fetching sentiment data for {ticker} from {chunk_start_str} to {chunk_end_str}...")
        params = {
            "ticker": ticker,
            "published_utc.gte": chunk_start_str,
            "published_utc.lte": chunk_end_str,
            "apiKey": api_key,
            "limit": limit
        }
        while True:
            response = requests.get(url, params=params)
            if response.status_code == 200:
                data = response.json()
                results = data.get("results", [])
                all_results.extend(results)
                next_cursor = data.get("next_cursor")
                if not next_cursor:
                    break
                params["cursor"] = next_cursor
            else:
                print(f"‚ö†Ô∏è Error fetching sentiment data for {ticker}: {response.status_code}, {response.text}")
                break
        current_start_date = chunk_end_date
        time.sleep(14)  # Avoid hitting API rate limits
    return all_results

def analyze_sentiment(news_data):
    """
    Uses TextBlob to compute sentiment polarity and subjectivity for each news article.
    """
    analyzed_data = []
    for article in news_data:
        title = article.get("title", "")
        description = article.get("description", "")
        full_text = f"{title} {description}"
        sentiment = TextBlob(full_text).sentiment
        analyzed_data.append({
            "title": title,
            "description": description,
            "published_date": article.get("published_utc", ""),
            "sentiment_polarity": sentiment.polarity,
            "sentiment_subjectivity": sentiment.subjectivity
        })
    return analyzed_data

def merge_stock_and_sentiment(stock_df, sentiment_data):
    """
    Merges stock data with sentiment data by date.
    """
    sentiment_df = pd.DataFrame(sentiment_data)
    if sentiment_df.empty:
        print("‚ö†Ô∏è No sentiment data found, proceeding without sentiment.")
        stock_df["sentiment_polarity"] = 0  # Default neutral
        stock_df["sentiment_subjectivity"] = 0
        return stock_df
    sentiment_df['published_date'] = pd.to_datetime(sentiment_df['published_date'], errors='coerce')
    sentiment_df['Date'] = sentiment_df['published_date'].dt.date
    daily_sentiment = sentiment_df.groupby('Date').agg({
        'sentiment_polarity': 'mean',
        'sentiment_subjectivity': 'mean'
    }).reset_index()
    merged_df = pd.merge(stock_df, daily_sentiment, on="Date", how="left")
    merged_df[['sentiment_polarity', 'sentiment_subjectivity']] = (
        merged_df[['sentiment_polarity', 'sentiment_subjectivity']]
        .replace(0, pd.NA)
        .ffill()
    )
    merged_df.fillna(0, inplace=True)
    return merged_df

# --- Main raw data collection for selected tickers ---
def collect_raw_data():
    # Set fixed start/end dates (adjust as needed)
    start_date = "2021-01-01"
    end_date = datetime.date.today().strftime("%Y-%m-%d")
    # List of tickers for stock + sentiment collection
    tickers = ["AAPL", "AMZN", "MSFT", "SPY", "QQQ"]

    # Load your Polygon API key (assume stored in Google Colab user data or environment)
    # For example, you can store it in a file or use environment variables.
    # Here, we simulate fetching it:
    api_key = userdata.get("Polygon_Key")
    if api_key == "YOUR_POLYGON_API_KEY":
        print("Please set your Polygon API key in the environment variable POLYGON_API_KEY")
        return

    for ticker in tickers:
        print(f"\n================== Processing {ticker} ==================")
        print(f"üìä Fetching stock data for {ticker} from {start_date} to {end_date}...")
        stock_df = fetch_stock_data_polygon(ticker, start_date, end_date, api_key)
        if stock_df is None:
            print(f"‚ùå No stock data found for {ticker}. Skipping.")
            continue
        print(f"üì∞ Fetching sentiment data for {ticker} from {start_date} to {end_date}...")
        news_data = fetch_sentiment_data_polygon(ticker, start_date, end_date, api_key, limit=1000)
        if not news_data:
            print(f"‚ö†Ô∏è No news data found for {ticker}. Proceeding without sentiment data.")
        print("üí° Performing sentiment analysis...")
        sentiment_data = analyze_sentiment(news_data)
        print("üîó Merging stock and sentiment data...")
        merged_df = merge_stock_and_sentiment(stock_df, sentiment_data)
        # Save raw merged data to Google Drive
        save_dir = "/content/drive/MyDrive/StockData"
        os.makedirs(save_dir, exist_ok=True)
        filename = os.path.join(save_dir, f"{ticker}_{start_date}_to_{end_date}_raw.csv")
        merged_df.to_csv(filename, index=False)
        print(f"‚úÖ Raw data for {ticker} saved to: {filename}")

# Run raw data collection
collect_raw_data()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

üìä Fetching stock data for AAPL from 2021-01-01 to 2025-03-16...
üì∞ Fetching sentiment data for AAPL from 2021-01-01 to 2025-03-16...
üì° Fetching sentiment data for AAPL from 2021-01-01 to 2021-01-31...
üì° Fetching sentiment data for AAPL from 2021-01-31 to 2021-03-02...
üì° Fetching sentiment data for AAPL from 2021-03-02 to 2021-04-01...
üì° Fetching sentiment data for AAPL from 2021-04-01 to 2021-05-01...
üì° Fetching sentiment data for AAPL from 2021-05-01 to 2021-05-31...
üì° Fetching sentiment data for AAPL from 2021-05-31 to 2021-06-30...
üì° Fetching sentiment data for AAPL from 2021-06-30 to 2021-07-30...
üì° Fetching sentiment data for AAPL from 2021-07-30 to 2021-08-29...
üì° Fetching sentiment data for AAPL from 2021-08-29 to 2021-09-28...
üì° Fetching sentiment data for AAPL from 2021-09-28 to 2021-10-28...
üì° Fetching sentimen

In [None]:
import pandas as pd

def calculate_technical_indicators(df):
    """
    Adds common technical indicators to the dataframe:
      - SMA (10 & 20 days)
      - EMA (10 & 20 days)
      - RSI (14-day)
      - MACD and MACD Signal
    Assumes df has a 'Close' column.
    """
    # Simple Moving Averages
    df['SMA_10'] = df['Close'].rolling(window=10).mean()
    df['SMA_20'] = df['Close'].rolling(window=20).mean()

    # Exponential Moving Averages
    df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA_20'] = df['Close'].ewm(span=20, adjust=False).mean()

    # Relative Strength Index (RSI)
    delta = df['Close'].diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=14, min_periods=14).mean()
    avg_loss = loss.rolling(window=14, min_periods=14).mean()
    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100 / (1 + rs))

    # MACD and MACD Signal
    ema12 = df['Close'].ewm(span=12, adjust=False).mean()
    ema26 = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = ema12 - ema26
    df['MACD_Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()

    # Forward-fill any indicator missing values
    df.fillna(method='ffill', inplace=True)
    return df

def preprocess_and_save(ticker, raw_filepath):
    """
    Loads raw CSV data, calculates technical indicators, and saves the processed file.
    """
    df = pd.read_csv(raw_filepath, parse_dates=["Date"])
    df = calculate_technical_indicators(df)

    # Save processed data to a new file
    processed_filepath = raw_filepath.replace("_raw.csv", "_processed.csv")
    df.to_csv(processed_filepath, index=False)
    print(f"‚úÖ Processed data with indicators for {ticker} saved to: {processed_filepath}")
    return processed_filepath

# Example: Process each ticker's raw data file in the Google Drive folder
import glob, os
raw_files = glob.glob("/content/drive/MyDrive/StockData/*_raw.csv")
for file in raw_files:
    ticker = os.path.basename(file).split("_")[0]
    preprocess_and_save(ticker, file)


In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import joblib
import os

def scale_data(df, exclude_cols=['sentiment_polarity', 'sentiment_subjectivity']):
    """
    Scales numeric columns (except those in exclude_cols) using MinMaxScaler.
    Returns the scaled dataframe and the scaler object.
    """
    scaler = MinMaxScaler()
    # Identify numeric columns to scale (exclude date and specified columns)
    cols_to_scale = [col for col in df.select_dtypes(include=['float64','int64']).columns
                     if col not in exclude_cols]

    df_scaled = df.copy()
    df_scaled[cols_to_scale] = scaler.fit_transform(df_scaled[cols_to_scale])
    return df_scaled, scaler

def scale_and_save(ticker, processed_filepath):
    """
    Loads processed data, scales it (saving scaler), and writes the scaled data.
    """
    df = pd.read_csv(processed_filepath, parse_dates=["Date"])
    df_scaled, scaler = scale_data(df)

    # Save the scaler to Google Drive for later use
    scaler_dir = "/content/drive/MyDrive/StockScalers"
    os.makedirs(scaler_dir, exist_ok=True)
    scaler_filepath = os.path.join(scaler_dir, f"{ticker}_scaler.pkl")
    joblib.dump(scaler, scaler_filepath)
    print(f"‚úÖ Scaler for {ticker} saved to: {scaler_filepath}")

    # Save the scaled dataframe
    scaled_filepath = processed_filepath.replace("_processed.csv", "_scaled.csv")
    df_scaled.to_csv(scaled_filepath, index=False)
    print(f"‚úÖ Scaled data for {ticker} saved to: {scaled_filepath}")
    return scaled_filepath

# Process scaling for each processed file
processed_files = glob.glob("/content/drive/MyDrive/StockData/*_processed.csv")
for file in processed_files:
    ticker = os.path.basename(file).split("_")[0]
    scale_and_save(ticker, file)


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import joblib
import keras_tuner as kt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout,
                                     SimpleRNN, LSTM, Concatenate, Multiply, Activation, Lambda)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from matplotlib.lines import Line2D

# === Helper: Inverse scaling for a single feature ===
def inverse_transform_single_feature(scaler, data):
    data = np.array(data).reshape(-1, 1)
    return scaler.inverse_transform(data)

# === Build Ensemble Model Function ===
def build_ensemble_model(hp, input_shape):
    inputs = Input(shape=input_shape)
    # --- CNN Branch ---
    cnn = Conv1D(filters=hp.Choice('cnn_filters', [64, 128, 256]),
                 kernel_size=hp.Choice('cnn_kernel_size', [3, 5, 7]),
                 activation='relu',
                 padding='same')(inputs)
    if input_shape[0] > 1:
        cnn = MaxPooling1D(pool_size=2)(cnn)
    cnn = Flatten()(cnn)
    cnn = Dense(50, activation='relu')(cnn)
    # --- RNN Branch ---
    rnn = SimpleRNN(units=hp.Choice('rnn_units', [75, 100, 125, 150]), return_sequences=True)(inputs)
    rnn = Dropout(hp.Choice('dropout_rate', [0.05, 0.1, 0.2]))(rnn)
    rnn = SimpleRNN(units=hp.Choice('rnn_units_2', [75, 100, 125, 150]))(rnn)
    rnn = Dropout(hp.Choice('dropout_rate_2', [0.05, 0.1, 0.2]))(rnn)
    rnn = Dense(50, activation='relu')(rnn)
    # --- LSTM Branch ---
    lstm = LSTM(units=hp.Choice('lstm_units', [50, 75, 100]), return_sequences=True)(inputs)
    lstm = LSTM(units=hp.Choice('lstm_units_2', [50, 75, 100]))(lstm)
    lstm = Dense(50, activation='relu')(lstm)
    lstm = Dropout(hp.Choice('dropout_rate_lstm', [0.1, 0.2, 0.3]))(lstm)
    # --- Adaptive Fusion ---
    combined = Concatenate()([cnn, rnn, lstm])
    weight_logits = Dense(3)(combined)
    branch_weights = Activation('softmax')(weight_logits)
    cnn_weight  = Lambda(lambda x: tf.reshape(x[:, 0], (-1, 1)))(branch_weights)
    rnn_weight  = Lambda(lambda x: tf.reshape(x[:, 1], (-1, 1)))(branch_weights)
    lstm_weight = Lambda(lambda x: tf.reshape(x[:, 2], (-1, 1)))(branch_weights)
    cnn_scaled  = Multiply()([cnn, cnn_weight])
    rnn_scaled  = Multiply()([rnn, rnn_weight])
    lstm_scaled = Multiply()([lstm, lstm_weight])
    merged = Concatenate()([cnn_scaled, rnn_scaled, lstm_scaled])
    merged = Dense(units=hp.Choice('dense_units', [50, 100, 150]),
                   activation="relu",
                   kernel_regularizer=l2(0.001))(merged)
    merged = Dropout(hp.Choice('dropout_rate_dense', [0.1, 0.2, 0.3]))(merged)
    output = Dense(1)(merged)
    model = Model(inputs, output)
    model.compile(
        optimizer=Adam(learning_rate=hp.Choice('learning_rate', [0.001, 0.0005, 0.0001])),
        loss="mse",
        metrics=["mae"]
    )
    return model

# === Assume you have already generated training sequences (X_train, y_train, X_val, y_val, X_test, y_test)
# For demonstration, we create dummy data:
sequence_length = 60  # e.g., using past 60 days
num_features = 10     # Adjust to match your feature set (price, volume, indicators, etc.)
X_train = np.random.rand(200, sequence_length, num_features)
y_train = np.random.rand(200)
X_val = np.random.rand(50, sequence_length, num_features)
y_val = np.random.rand(50)
X_test = np.random.rand(50, sequence_length, num_features)
y_test = np.random.rand(50)

input_shape = (X_train.shape[1], X_train.shape[2])
model_folder = "/content/drive/MyDrive/StockModels/Ensemble"
os.makedirs(model_folder, exist_ok=True)
best_hps_file = os.path.join(model_folder, "best_hyperparameters.json")
tuning_flag_file = os.path.join(model_folder, "hp_tuning_complete.flag")

# === Hyperparameter Tuning ===
if not os.path.exists(tuning_flag_file):
    if os.path.exists(best_hps_file):
        os.remove(best_hps_file)
    print("üîç Hyperparameter tuning...")
    tuner = kt.BayesianOptimization(
        lambda hp: build_ensemble_model(hp, input_shape),
        objective="val_loss",
        max_trials=15,
        executions_per_trial=2,
        directory=os.path.join(model_folder, "tuning"),
        project_name="ensemble_stock_prediction"
    )
    tuner.search(X_train, y_train, epochs=50, validation_data=(X_val, y_val), verbose=1)
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    best_hps_dict = {param: best_hps.get(param) for param in best_hps.values.keys()}
    with open(best_hps_file, "w") as f:
        json.dump(best_hps_dict, f)
    with open(tuning_flag_file, "w") as f:
        f.write("tuning complete")
    model = tuner.hypermodel.build(best_hps)
else:
    print("‚úÖ Loading best hyperparameters from file...")
    with open(best_hps_file, "r") as f:
        best_hps_dict = json.load(f)
    best_hps = kt.HyperParameters()
    for key, value in best_hps_dict.items():
        best_hps.Fixed(key, value)
    model = build_ensemble_model(best_hps, input_shape)
print("‚úÖ Best hyperparameters:", best_hps_dict)

# === Model Training ===
BATCH_SIZE = 32
early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
history = model.fit(
    X_train, y_train,
    epochs=500,
    batch_size=BATCH_SIZE,
    validation_data=(X_val, y_val),
    callbacks=[early_stop],
    verbose=1
)

# Save the trained model
best_model_path = os.path.join(model_folder, "best_ensemble_model.keras")
model.save(best_model_path)
print("‚úÖ Best Ensemble Model saved to", best_model_path)

# Plot training history
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Train Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training & Validation Loss")
plt.legend()
history_plot_path = os.path.join(model_folder, "training_history.png")
plt.savefig(history_plot_path)
plt.close()
print("‚úÖ Training history graph saved to", history_plot_path)

# === Evaluate Model & Directional Accuracy ===
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f"‚úÖ Test Loss: {loss:.4f}, Test MAE: {mae:.4f}")

predictions = model.predict(X_test)
# For inverse scaling, load scaler for target variable (assume saved separately)
# For demonstration, we simulate a scaler (in practice, use joblib.load(scaler_y_path))
from sklearn.preprocessing import MinMaxScaler
scaler_y = MinMaxScaler()
scaler_y.fit(y_test.reshape(-1, 1))  # dummy fit
pred_rescaled = inverse_transform_single_feature(scaler_y, predictions)
y_test_rescaled = inverse_transform_single_feature(scaler_y, y_test)

# Calculate directional accuracy
correct_direction = 0
for i in range(len(y_test_rescaled)-1):
    actual_diff = y_test_rescaled[i+1] - y_test_rescaled[i]
    pred_diff = pred_rescaled[i+1] - pred_rescaled[i]
    if (actual_diff * pred_diff) >= 0:
        correct_direction += 1
directional_accuracy = (correct_direction / (len(y_test_rescaled) - 1)) * 100
print(f"‚úÖ Directional Accuracy: {directional_accuracy:.2f}%")

# Plot Actual vs Predicted with directional coloring
x_vals = np.arange(len(y_test_rescaled))
plt.figure(figsize=(12, 6))
plt.plot(x_vals, y_test_rescaled, label="Actual Price", color="blue")
for i in range(len(x_vals)-1):
    actual_diff = y_test_rescaled[i+1] - y_test_rescaled[i]
    pred_diff = pred_rescaled[i+1] - pred_rescaled[i]
    color = 'green' if (actual_diff * pred_diff) >= 0 else 'red'
    plt.plot(x_vals[i:i+2], pred_rescaled[i:i+2], color=color)
blue_line = Line2D([0], [0], color='blue', label='Actual Price')
green_line = Line2D([0], [0], color='green', label='Predicted (Correct Dir)')
red_line = Line2D([0], [0], color='red', label='Predicted (Wrong Dir)')
plt.legend(handles=[blue_line, green_line, red_line])
plt.xlabel("Time")
plt.ylabel("Stock Price")
plt.title("Predicted vs Actual Prices")
pred_plot_path = os.path.join(model_folder, "pred_vs_actual.png")
plt.savefig(pred_plot_path)
plt.close()
print("‚úÖ Prediction vs Actual plot saved to", pred_plot_path)


In [None]:
import os
import numpy as np
import datetime
import pandas as pd
import joblib
import requests
import time

def daily_data_pipeline(ticker, date, model, scaler, sequence_length=60):
    """
    Pipeline to:
      1. Fetch the latest daily stock data (with sentiment and technical indicators).
      2. Preprocess and calculate technical indicators.
      3. Scale using the saved scaler.
      4. Build a rolling window sequence and predict.
    """
    # Set your Polygon API key from environment variable
    api_key = os.environ.get("POLYGON_API_KEY", "YOUR_POLYGON_API_KEY")
    if api_key == "YOUR_POLYGON_API_KEY":
        print("Please set your Polygon API key in the environment variable POLYGON_API_KEY")
        return None

    # Fetch stock data for the day (using the raw data function)
    url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/1/day/{date}/{date}?apiKey={api_key}"
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Error fetching data for {ticker} on {date}: {response.text}")
        return None
    data = response.json()
    if "results" not in data:
        print(f"No stock data available for {ticker} on {date}.")
        return None
    df_stock = pd.DataFrame(data["results"])
    df_stock["Date"] = pd.to_datetime(df_stock["t"], unit="ms").dt.date
    df_stock.rename(columns={"o": "Open", "h": "High", "l": "Low", "c": "Close", "v": "Volume"}, inplace=True)
    df_stock = df_stock[["Date", "Open", "High", "Low", "Close", "Volume"]]

    # (Optionally) Fetch and process sentiment data for the day...
    # For simplicity, assume neutral sentiment if not available.
    df_stock["sentiment_polarity"] = 0
    df_stock["sentiment_subjectivity"] = 0

    # Calculate technical indicators
    df_stock = calculate_technical_indicators(df_stock)

    # In a production setting, you would build your sequence using the previous (historical) data.
    # Here we simulate a rolling window by reading a stored scaled CSV file and appending the new day.
    historical_filepath = f"/content/drive/MyDrive/StockData/{ticker}_processed.csv"
    if not os.path.exists(historical_filepath):
        print("Historical data file not found for", ticker)
        return None
    df_hist = pd.read_csv(historical_filepath, parse_dates=["Date"])
    df_hist = calculate_technical_indicators(df_hist)

    # Append today's data and then scale using the saved scaler
    df_combined = pd.concat([df_hist, df_stock], ignore_index=True)
    df_scaled = df_combined.copy()
    # Identify columns to scale (same as in scaling function)
    numeric_cols = [col for col in df_scaled.select_dtypes(include=['float64','int64']).columns
                    if col not in ['sentiment_polarity', 'sentiment_subjectivity']]
    df_scaled[numeric_cols] = scaler.transform(df_scaled[numeric_cols])

    # Build sequence: take the last 'sequence_length' rows for features.
    try:
        # Define the feature set for prediction (adjust column names as needed)
        feature_cols = ['Open', 'High', 'Low', 'Close', 'Volume',
                        'SMA_10', 'SMA_20', 'EMA_10', 'EMA_20', 'RSI']
        seq = df_scaled[feature_cols].tail(sequence_length).values
    except Exception as e:
        print("Error building feature sequence:", e)
        return None

    if seq.shape[0] < sequence_length:
        pad = np.tile(seq[0], (sequence_length - seq.shape[0], 1))
        seq = np.vstack([pad, seq])
    seq = seq.reshape(1, sequence_length, len(feature_cols))

    prediction = model.predict(seq)
    print(f"Predicted price for {ticker} on {date}: {prediction[0][0]}")
    return prediction[0][0]

# Example usage for daily deployment:
# Load your trained model and the saved scaler (for the target features, if different, adjust accordingly)
# Assume best_model has been loaded and scaler was saved in "/content/drive/MyDrive/StockScalers/{ticker}_scaler.pkl"
ticker = "AAPL"
today_date = datetime.date.today().strftime("%Y-%m-%d")
scaler_path = f"/content/drive/MyDrive/StockScalers/{ticker}_scaler.pkl"
if os.path.exists(scaler_path):
    scaler = joblib.load(scaler_path)
else:
    print("Scaler file not found for", ticker)
    scaler = None

# Load the model (or use the one from training)
from tensorflow.keras.models import load_model
model_path = f"/content/drive/MyDrive/StockModels/Ensemble/best_ensemble_model.keras"
if os.path.exists(model_path):
    deployed_model = load_model(model_path)
else:
    print("Model file not found at", model_path)
    deployed_model = None

if scaler is not None and deployed_model is not None:
    daily_prediction = daily_data_pipeline(ticker, today_date, deployed_model, scaler)
