# Start Execution and Runtime

In [78]:
! pip install numpy pandas yfinance matplotlib joblib tensorflow



In [79]:
import os
import datetime
import logging
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import joblib
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.saving import register_keras_serializable

In [80]:
# ---------------------------
# Setup logging
# ---------------------------
def setup_logging(log_file: str = 'stock_prediction.log') -> None:
    """
    Sets up logging to both file and console with INFO level.

    Parameters:
    - log_file: str, filename for logging output.
    """
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    # File handler
    fh = logging.FileHandler(log_file)
    fh.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s')
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    # Console handler
    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)
    ch.setFormatter(formatter)
    logger.addHandler(ch)

setup_logging()

In [81]:
# --- Custom Loss and Model Definition (identical to train_and_evaluate.py) ---
@register_keras_serializable(package="MyModels")
def custom_weighted_loss_v2(y_true, y_pred):
    horizon = tf.shape(y_true)[1]
    weights = 1.0 + 0.5 * tf.cast(tf.range(horizon), tf.float32)
    weights = tf.reshape(weights, [1, horizon])
    weighted_errors = tf.square(y_true - y_pred) * weights
    return tf.reduce_mean(weighted_errors)

custom_loss = custom_weighted_loss_v2

In [82]:
@register_keras_serializable(package="MyModels")
class StockLSTMModel(models.Model):
    def __init__(self, sequence_length: int, forecast_horizon: int = 5,
                 lstm_units: int = 100, dropout_rate: float = 0.2, **kwargs):
        super().__init__(**kwargs)
        self.sequence_length = sequence_length
        self.forecast_horizon = forecast_horizon
        self.lstm_units = lstm_units
        self.dropout_rate = dropout_rate

        self.lstm1 = layers.LSTM(self.lstm_units, return_sequences=True)
        self.dropout1 = layers.Dropout(self.dropout_rate)
        self.lstm2 = layers.LSTM(self.lstm_units, return_sequences=False)
        self.dropout2 = layers.Dropout(self.dropout_rate)
        self.dense1 = layers.Dense(25, activation='relu')
        self.dense2 = layers.Dense(self.forecast_horizon)

    def call(self, inputs, training: bool = False):
        x = self.lstm1(inputs)
        x = self.dropout1(x, training=training)
        x = self.lstm2(x)
        x = self.dropout2(x, training=training)
        x = self.dense1(x)
        return self.dense2(x)

    def get_config(self):
        config = super().get_config()
        config.update({
            "sequence_length": self.sequence_length,
            "forecast_horizon": self.forecast_horizon,
            "lstm_units": self.lstm_units,
            "dropout_rate": self.dropout_rate
        })
        config
        return config

In [83]:
def download_stock_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    if data.empty:
        raise ValueError("No data returned for the given date range.")
    return data

---

In [84]:
def create_sequences(data, window_size, forecast_horizon):
    """
    Create sequences for training.
    Assumes data is a 2D numpy array where column 0 (Close) is the target.
    """
    X, y = [], []
    for i in range(len(data) - window_size - forecast_horizon + 1):
        X.append(data[i : i + window_size])
        y.append(data[i + window_size : i + window_size + forecast_horizon, 0])
    return np.array(X), np.array(y)

# Enter Target Date (YYYY-MM-DD)

In [None]:
# @title Select Target Date
model_path = "/content/amzn_base.keras"
scaler_path = "/content/amzn_base_scaler.pkl"

if not os.path.exists(model_path):
    print("Model file not found.")
if not os.path.exists(scaler_path):
    print("Scaler file not found.")

# Load model and scaler.
model = tf.keras.models.load_model(model_path)
scaler = joblib.load(scaler_path)
logging.info("Model and scaler loaded successfully.")

# ---------------------------
# Inference Setup
# ---------------------------
window_size = 1
forecast_horizon = 30
num_features = 1  # e.g., Close, Volume

ticker = "AMZN"
training_start = "2011-01-01"
today_str = datetime.datetime.today().strftime("%Y-%m-%d")
data = download_stock_data(ticker, training_start, today_str)
last_trading_day = data.index[-1]
logging.info("Last trading day from downloaded data: %s", last_trading_day)

# Use only the necessary features.
# data['Daily_Return'] = data['Close'].pct_change()
# data['SMA_10'] = data['Close'].rolling(window=10).mean()
# data['SMA_30'] = data['Close'].rolling(window=30).mean()
data = data[['Close']].dropna() # , 'Volume', 'Daily_Return', 'SMA_10', 'SMA_30'

all_values = data.values

if len(all_values) < window_size:
    print("Not enough data to form input sequence.")

# Scale data using the loaded scaler.
all_scaled = scaler.transform(all_values)
recent_sequence = all_scaled[-window_size:]

# Get target forecast date from the user.
last_date = last_trading_day.date()

####
TARGET_DATE = ""  # @param {type:"string"}
####
target_date_str = TARGET_DATE # input(f"Enter target date after {last_date} (YYYY-MM-DD): ").strip()
target_date = datetime.datetime.strptime(target_date_str, "%Y-%m-%d").date()
print("Invalid date format. Exiting.")

forecast_days = np.busday_count(
    np.array([last_date], dtype='datetime64[D]'),
    np.array([target_date], dtype='datetime64[D]')
)[0]
if forecast_days <= 0:
  print("Target date must be after the last trading day.")
forecast_days = min(forecast_days, forecast_horizon)

# Predict using the last available sequence.
prediction_vector = model.predict(recent_sequence[None, :, :])[0]
predictions = prediction_vector[:forecast_days].reshape(-1, 1)

# Invert scaling for the 'Close' column (index 0).
dummy = np.zeros((forecast_days, num_features))
dummy[:, 0] = predictions[:, 0]
dummy_inversed = scaler.inverse_transform(dummy)
final_forecast = dummy_inversed[:, 0]

pred_dates = pd.bdate_range(last_trading_day + pd.Timedelta(days=1), periods=forecast_days).date
print(f"\nPredicted closing prices for next {forecast_days} business days until {target_date}:")
for d, price in zip(pred_dates, final_forecast):
    print(f"  {d}: ${price:.2f}")

# Save predictions.
save_dir = os.path.join("saved-data", "final_predictions")
os.makedirs(save_dir, exist_ok=True)
base_filename = f"final_predictions_{ticker}_{target_date_str}"
csv_path = os.path.join(save_dir, base_filename + f"_retrained_{datetime.datetime.now()}" + ".csv")
png_path = os.path.join(save_dir, base_filename + f"_retrained_{datetime.datetime.now()}" + ".png")

df_pred = pd.DataFrame({
    "Date": [d.strftime("%Y-%m-%d") for d in pred_dates],
    "Forecast": final_forecast
})
df_pred.to_csv(csv_path, index=False)
logging.info("Forecast data saved to %s", csv_path)

plt.figure(figsize=(8, 4))
plt.plot(pred_dates, final_forecast, marker='o', linestyle='--', label='Forecast')
plt.title(f"{ticker} Forecast until {target_date_str}")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.legend()
plt.tight_layout()
plt.savefig(png_path)
logging.info("Forecast plot saved to %s", png_path)
plt.show()



# Retrain and Save New Model: (y/n)

In [None]:
# ---------------------------
# Optional Retraining Component
# ---------------------------
RETRAIN_MODEL = "" # @param {type: "string"}
retrain_option = RETRAIN_MODEL # input("Would you like to retrain the model with the latest data? (y/n): ").strip().lower()
if retrain_option == "y":
    logging.info("Starting retraining with updated data.")
    # Optionally, you can choose to re-fit the scaler with the new data.
    # For now, we use the existing scaler to transform the full dataset.
    full_values = data.values
    full_scaled = scaler.transform(full_values)

    # Create sequences from the full scaled data.
    # Here we assume the target is the 'Close' price (index 0).
    X_train, y_train = create_sequences(full_scaled, window_size, forecast_horizon)
    if X_train.size == 0:
        print("Not enough data for retraining sequences.")

    logging.info("Training data shape: X_train: %s, y_train: %s", X_train.shape, y_train.shape)

    # Fine-tune the model with the new data.
    # Adjust epochs and batch size as necessary.
    history = model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    print("Retraining Completed.")
    logging.info("Retraining completed.")

    # Save the updated model.
    # saved_model = input("Save As: ")
    model.save(model_path)


    logging.info("Updated model saved to %s", model_path)
    print("Retraining completed and model updated.")
