In [5]:
# fixed_improved_lstm_adbl_with_sentiment_and_techindicators_with_classification_fixed.py
"""
Pipeline with sentiment + technical indicators support, plus directional classification metrics.
All outputs (JSON, PNGs, CSVs, models) are saved to outputs/<SYMBOL>/.
Fixes broadcasting errors when inverse-transforming the scaled close values.
Requirements: pandas, numpy, matplotlib, seaborn, scikit-learn, tensorflow, joblib
"""

import os
import random
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import datetime
import joblib

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import regularizers

# -------------------------
# Config / reproducibility
# -------------------------
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

DATA_PATH = "NABIL.csv"    # <-- change to your CSV file (or path)
SEQ_LEN = 60

TEST_PCT = 0.10
VAL_WITHIN_TRAIN_PCT = 0.10

RAW_FEATURES_REQUIRED = ['Open', 'High', 'Low', 'Close', 'Volume', 'Turnover', 'sentiment']
PCT_CANDIDATES = ['Percent_Change', 'Percent Change', 'PctChange', 'Percent']

ENGINEERED = [
    'MA5', 'MA10', 'Return', 'RSI14',
    'SMA20', 'SMA100', 'EMA20',
    'BB_upper', 'BB_lower',
    'MACD', 'MACD_signal', 'MACD_hist',
    'STOCH_K', 'STOCH_D',
    'Volume_MA20'
]

FEATURES = None
CLOSE_COL_NAME = 'Close'

MODEL_SAVE_DIR = "saved_models"
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

# -------------------------
# outputs directory (symbol-specific)
# -------------------------
symbol = os.path.splitext(os.path.basename(DATA_PATH))[0].upper()
OUTPUTS_DIR = "outputs"
symbol_dir = os.path.join(OUTPUTS_DIR, symbol)
os.makedirs(symbol_dir, exist_ok=True)

# -------------------------
# Helper indicator functions
# -------------------------
def compute_RSI(series, period=14):
    delta = series.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=period, min_periods=period).mean()
    avg_loss = loss.rolling(window=period, min_periods=period).mean()
    rs = avg_gain / (avg_loss + 1e-9)
    rsi = 100 - (100 / (1 + rs))
    return rsi.fillna(50.0)

def compute_bollinger(series, window=20, n_std=2):
    sma = series.rolling(window=window, min_periods=1).mean()
    std = series.rolling(window=window, min_periods=1).std().fillna(0)
    upper = sma + n_std * std
    lower = sma - n_std * std
    return upper, lower

def compute_macd(series, fast=12, slow=26, signal=9):
    ema_fast = series.ewm(span=fast, adjust=False).mean()
    ema_slow = series.ewm(span=slow, adjust=False).mean()
    macd = ema_fast - ema_slow
    macd_signal = macd.ewm(span=signal, adjust=False).mean()
    macd_hist = macd - macd_signal
    return macd, macd_signal, macd_hist

def compute_stochastic(high, low, close, k_period=14, d_period=3):
    low_min = low.rolling(window=k_period, min_periods=1).min()
    high_max = high.rolling(window=k_period, min_periods=1).max()
    denom = (high_max - low_min).replace(0, np.nan)
    k = 100 * (close - low_min) / denom
    k = k.fillna(50.0)
    d = k.rolling(window=d_period, min_periods=1).mean().fillna(50.0)
    return k, d

# -------------------------
# 1) Load & basic cleaning
# -------------------------
df = pd.read_csv(DATA_PATH)

# Normalize column names
df.columns = [c.strip().replace(" ", "_") for c in df.columns]

# Unify percent column
found_pct = None
for cand in PCT_CANDIDATES:
    col_name = cand.replace(" ", "_")
    if col_name in df.columns:
        df['Percent_Change'] = df[col_name]
        found_pct = col_name
        break
if found_pct is None:
    df['Percent_Change'] = np.nan

# Clean percent string and numeric columns
df['Percent_Change'] = df['Percent_Change'].astype(str).str.replace('%', '', regex=False).str.replace(',', '', regex=False).str.strip()
df['Percent_Change'] = pd.to_numeric(df['Percent_Change'], errors='coerce')

# Clean raw numeric columns (remove commas/quotes)
for c in RAW_FEATURES_REQUIRED:
    if c in df.columns:
        df[c] = df[c].astype(str).str.replace(',', '', regex=False).str.replace('"', '', regex=False).str.strip()
        df[c] = pd.to_numeric(df[c], errors='coerce')
    else:
        print(f"WARNING: expected raw column '{c}' not found in CSV.")

# Date
if 'Date' not in df.columns:
    raise ValueError("CSV must contain a 'Date' column")
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df = df.sort_values('Date').reset_index(drop=True)

print("Loaded rows:", len(df))
print(df.dtypes)
print(df.head())

# -------------------------
# 2) EDA checks (quick plots)
# -------------------------
print("\n--- Basic EDA (raw columns present) ---")
print("Date range:", df['Date'].min(), "->", df['Date'].max())

eda_cols = [c for c in (RAW_FEATURES_REQUIRED + ['Percent_Change']) if c in df.columns]
print("Columns available for EDA:", eda_cols)
print("Null counts:\n", df[eda_cols].isna().sum())

plt.figure(figsize=(10,4))
plt.plot(df['Date'], df[CLOSE_COL_NAME], linewidth=1)
plt.title("Close price over time")
plt.xlabel("Date"); plt.ylabel("Close")
plt.tight_layout()
eda_path = os.path.join(symbol_dir, f"{symbol}_close_over_time.png")
plt.savefig(eda_path); plt.close()

num_cols_for_corr = [c for c in eda_cols if c in df.columns and df[c].dtype != object]
if len(num_cols_for_corr) >= 3:
    corr = df[num_cols_for_corr].corr()
    plt.figure(figsize=(8,6))
    sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm")
    plt.title("Correlation (raw cols)")
    corr_path = os.path.join(symbol_dir, f"{symbol}_corr.png")
    plt.tight_layout(); plt.savefig(corr_path); plt.close()

# -------------------------
# 3) Feature engineering (technical indicators)
# -------------------------
if CLOSE_COL_NAME not in df.columns:
    raise ValueError(f"'{CLOSE_COL_NAME}' column is required but not present.")

df['MA5'] = df[CLOSE_COL_NAME].rolling(window=5, min_periods=1).mean()
df['MA10'] = df[CLOSE_COL_NAME].rolling(window=10, min_periods=1).mean()
df['Return'] = df[CLOSE_COL_NAME].pct_change().fillna(0)

df['RSI14'] = compute_RSI(df[CLOSE_COL_NAME], 14)

df['SMA20'] = df[CLOSE_COL_NAME].rolling(window=20, min_periods=1).mean()
df['SMA100'] = df[CLOSE_COL_NAME].rolling(window=100, min_periods=1).mean()
df['EMA20'] = df[CLOSE_COL_NAME].ewm(span=20, adjust=False).mean()

df['BB_upper'], df['BB_lower'] = compute_bollinger(df[CLOSE_COL_NAME], window=20, n_std=2)

df['MACD'], df['MACD_signal'], df['MACD_hist'] = compute_macd(df[CLOSE_COL_NAME], fast=12, slow=26, signal=9)

# if any of High/Low missing compute_stochastic will fail - guard ensures presence
if all(c in df.columns for c in ['High','Low','Close']):
    df['STOCH_K'], df['STOCH_D'] = compute_stochastic(df['High'], df['Low'], df['Close'], k_period=14, d_period=3)

if 'Volume' in df.columns:
    df['Volume_MA20'] = df['Volume'].rolling(window=20, min_periods=1).mean()

required_after_engineer = RAW_FEATURES_REQUIRED + ['Percent_Change'] + ENGINEERED
present_required = [c for c in required_after_engineer if c in df.columns]
print("Columns required/present after engineering:", present_required)
df = df.dropna(subset=[c for c in present_required if c in df.columns]).reset_index(drop=True)
print("Rows after feature engineering:", len(df))

FEATURES = [c for c in [
    'Open','High','Low','Close','Volume','Turnover','Percent_Change','sentiment',
    'MA5','MA10','Return','RSI14',
    'SMA20','SMA100','EMA20','BB_upper','BB_lower',
    'MACD','MACD_signal','MACD_hist','STOCH_K','STOCH_D','Volume_MA20'
] if c in df.columns]

if 'Close' not in FEATURES:
    raise ValueError("'Close' must be in FEATURES after engineering.")

CLOSE_IDX = FEATURES.index('Close')
print("Final FEATURES used:", FEATURES)

# -------------------------
# 4) Train / Val / Test splits (time-based)
# -------------------------
n = len(df)
if n <= SEQ_LEN + 5:
    raise ValueError("Dataset too small for SEQ_LEN and splitting. Provide more rows or reduce SEQ_LEN.")

test_start = int(np.floor(n * (1.0 - TEST_PCT)))
train_and_val_end = test_start
train_and_val_n = train_and_val_end

val_within_train_n = int(np.floor(train_and_val_n * VAL_WITHIN_TRAIN_PCT))
train_end = train_and_val_n - val_within_train_n
val_start = train_end
val_end = train_and_val_n

print(f"Total rows: {n}")
print(f"Test region: rows {test_start} -> {n-1} (last {n-test_start} rows, ~{TEST_PCT*100:.2f}%)")
print(f"Train+Val region: rows 0 -> {train_and_val_end-1} (first {train_and_val_end} rows, ~{(1-TEST_PCT)*100:.2f}%)")
print(f" -> Within that: Train rows 0 -> {train_end-1} (count {train_end}); Val rows {val_start} -> {val_end-1} (count {val_within_train_n})")
print(f"Approx split (train/val/test): {train_end}/{val_within_train_n}/{n-test_start} rows")

raw_data = df[FEATURES].values.astype(float)

if train_end <= 0:
    raise ValueError("Training region computed as empty. Reduce VAL_WITHIN_TRAIN_PCT or TEST_PCT.")
scaler = MinMaxScaler()
scaler.fit(raw_data[:train_end])  # fit only on training region (first train_end rows)

scaled_all = scaler.transform(raw_data)

# -------------------------
# 5) Sequence creation
# -------------------------
def create_sequences_from_scaled(scaled, seq_len, close_idx):
    X, y, seq_idx = [], [], []
    nrows = len(scaled)
    for i in range(seq_len, nrows):
        X.append(scaled[i-seq_len:i])
        y.append(scaled[i, close_idx])
        seq_idx.append(i)
    return np.array(X), np.array(y), np.array(seq_idx)

X_all, y_all, seq_idx = create_sequences_from_scaled(scaled_all, SEQ_LEN, CLOSE_IDX)
print("Total sequences (samples):", X_all.shape, y_all.shape)

train_mask = seq_idx < train_end
val_mask = (seq_idx >= val_start) & (seq_idx < val_end)
test_mask = seq_idx >= test_start

X_train, y_train = X_all[train_mask], y_all[train_mask]
X_val, y_val = X_all[val_mask], y_all[val_mask]
X_test, y_test = X_all[test_mask], y_all[test_mask]

print("After splitting to sequences:")
print("  Train samples:", X_train.shape, y_train.shape)
print("  Val samples:  ", X_val.shape, y_val.shape)
print("  Test samples: ", X_test.shape, y_test.shape)

if len(X_train) == 0:
    raise ValueError("Training set is empty after splitting — reduce VAL_WITHIN_TRAIN_PCT or TEST_PCT, or add more data.")
if len(X_test) == 0:
    raise ValueError("Test set is empty — increase dataset size or reduce TEST_PCT.")

# -------------------------
# 6) Build the LSTM model (regularized)
# -------------------------
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(SEQ_LEN, len(FEATURES)),
         recurrent_dropout=0.15, kernel_regularizer=regularizers.l2(1e-4)),
    Dropout(0.25),
    LSTM(32, recurrent_dropout=0.15, kernel_regularizer=regularizers.l2(1e-4)),
    Dropout(0.25),
    Dense(1)
])

model.compile(optimizer="adam", loss="mse")
model.summary()

# Callbacks + model paths
timestamp = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
ckpt_path = os.path.join(symbol_dir, f"best_lstm_{timestamp}.h5")
symbol_model_h5 = os.path.join(symbol_dir, f"{symbol}_model.h5")
symbol_savedmodel_dir = os.path.join(symbol_dir, f"{symbol}_savedmodel_{timestamp}")
symbol_scaler_path = os.path.join(symbol_dir, f"{symbol}_scaler.joblib")
symbol_pred_csv = os.path.join(symbol_dir, f"{symbol}_test_predictions.csv")
symbol_pred_labels_csv = os.path.join(symbol_dir, f"{symbol}_test_predictions_with_labels.csv")
symbol_last_seq_csv = os.path.join(symbol_dir, f"{symbol}_last_sequence_features_scaled.csv")
symbol_meta_json = os.path.join(symbol_dir, f"{symbol}_meta.json")
loss_plot_path = os.path.join(symbol_dir, f"{symbol}_train_loss.png")
pred_plot_path = os.path.join(symbol_dir, f"{symbol}_prediction.png")
confusion_path = os.path.join(symbol_dir, f"{symbol}_confusion.png")
eda_path = os.path.join(symbol_dir, f"{symbol}_close_over_time.png")
corr_path = os.path.join(symbol_dir, f"{symbol}_corr.png")
json_path = os.path.join(symbol_dir, f"{symbol}.json")

callbacks = [
    EarlyStopping(monitor="val_loss", patience=12, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=6, verbose=1, min_lr=1e-6),
    ModelCheckpoint(ckpt_path, monitor='val_loss', save_best_only=True, verbose=1)
]

# -------------------------
# 7) Train (no shuffle)
# -------------------------
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=callbacks,
    shuffle=False,
    verbose=2
)

# plot losses and save
plt.figure(figsize=(8,4))
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.yscale('log')
plt.xlabel('Epoch'); plt.ylabel('Loss (MSE, log scale)')
plt.legend(); plt.title('Training vs Validation Loss')
plt.tight_layout(); plt.savefig(loss_plot_path); plt.close()

# -------------------------
# 8) Test evaluation and robust inverse transform helper
# -------------------------
def inverse_transform_close(scaled_close_array, scaler, close_idx=CLOSE_IDX):
    """
    scaled_close_array: 1D array-like of scaled close values (same scale as scaler).
    scaler: fitted MinMaxScaler
    close_idx: index of the close feature in the scaler input.
    Returns: inverse-transformed close values (original scale).
    This builds a full-width array matching scaler.n_features_in_ and fills only close_idx column,
    which avoids broadcasting shape errors.
    """
    arr = np.asarray(scaled_close_array).reshape(-1)
    n_features = scaler.n_features_in_ if hasattr(scaler, "n_features_in_") else len(FEATURES)
    out = np.zeros((len(arr), n_features), dtype=float)
    out[:, close_idx] = arr
    inv = scaler.inverse_transform(out)
    return inv[:, close_idx]

# get scaled predictions and ground-truth
y_test_pred_scaled = model.predict(X_test).flatten()
y_test_inv = inverse_transform_close(y_test, scaler)            # y_test is scaled close values
y_test_pred_inv = inverse_transform_close(y_test_pred_scaled, scaler)

mae = mean_absolute_error(y_test_inv, y_test_pred_inv)
rmse = np.sqrt(mean_squared_error(y_test_inv, y_test_pred_inv))
print(f"Test MAE: {mae:.4f}, RMSE: {rmse:.4f}")

# -------------------------
# 9) Feature importance check (permutation) - optional
# -------------------------
if 'sentiment' in FEATURES:
    sent_idx = FEATURES.index('sentiment')
    def permutation_importance_lstm(model, X, y_true, scaler, feature_idx, close_idx=CLOSE_IDX):
        X_shuffled = X.copy()
        n_samples, seq_len, n_features = X.shape
        flat = X_shuffled[:,:,feature_idx].flatten()
        np.random.shuffle(flat)
        X_shuffled[:,:,feature_idx] = flat.reshape(n_samples, seq_len)

        y_pred_scaled = model.predict(X_shuffled).flatten()
        y_pred_inv = inverse_transform_close(y_pred_scaled, scaler)
        y_true_inv = inverse_transform_close(y_true, scaler)

        base_rmse = np.sqrt(mean_squared_error(y_true_inv, inverse_transform_close(model.predict(X).flatten(), scaler)))
        new_rmse = np.sqrt(mean_squared_error(y_true_inv, y_pred_inv))

        increase_pct = 100 * (new_rmse - base_rmse) / base_rmse if base_rmse > 0 else 0.0
        return increase_pct

    importance_sentiment = permutation_importance_lstm(model, X_test, y_test, scaler, sent_idx)
    print(f"Sentiment importance (RMSE increase % if shuffled): {importance_sentiment:.2f}%")
else:
    importance_sentiment = None

# Plot & save chunk of test actual vs predicted
plot_n = min(200, len(y_test_inv))
test_dates = df['Date'].iloc[seq_idx[test_mask]][-plot_n:]
test_dates = pd.to_datetime(test_dates)

plt.figure(figsize=(12,4))
plt.plot(test_dates, y_test_inv[-plot_n:], label='Actual Close')
plt.plot(test_dates, y_test_pred_inv[-plot_n:], label='Predicted Close')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # shows YYYY-MM
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.xticks(rotation=30)
plt.title('Test: Actual vs Predicted Close')
plt.legend()
plt.tight_layout(); plt.savefig(pred_plot_path); plt.close()

# -------------------------
# 10) Classification: convert regression outputs -> UP/DOWN/UNCHANGED
# -------------------------
tol = 0.001

test_indices = seq_idx[test_mask]
prev_idx = np.maximum(test_indices - 1, 0)
prev_close_vals = df[CLOSE_COL_NAME].iloc[prev_idx].values

actual_rel = (y_test_inv - prev_close_vals) / (prev_close_vals + 1e-9)
pred_rel = (y_test_pred_inv - prev_close_vals) / (prev_close_vals + 1e-9)

def rel_to_label(rel_array, tol=tol):
    labels = []
    for r in np.asarray(rel_array).flatten():
        if r > tol:
            labels.append('UP')
        elif r < -tol:
            labels.append('DOWN')
        else:
            labels.append('UNCHANGED')
    return np.array(labels)

actual_labels = rel_to_label(actual_rel, tol=tol)
pred_labels = rel_to_label(pred_rel, tol=tol)

unique, counts = np.unique(actual_labels, return_counts=True)
print("Actual label distribution:", dict(zip(unique, counts)))
unique, counts = np.unique(pred_labels, return_counts=True)
print("Predicted label distribution:", dict(zip(unique, counts)))

acc = accuracy_score(actual_labels, pred_labels)
prec_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(actual_labels, pred_labels, average='macro', zero_division=0)
prec_weighted, recall_weighted, f1_weighted, _ = precision_recall_fscore_support(actual_labels, pred_labels, average='weighted', zero_division=0)

print("\n--- Direction classification metrics ---")
print(f"Accuracy: {acc:.4f}")
print(f"Macro Precision: {prec_macro:.4f}, Macro Recall: {recall_macro:.4f}, Macro F1: {f1_macro:.4f}")
print(f"Weighted Precision: {prec_weighted:.4f}, Weighted Recall: {recall_weighted:.4f}, Weighted F1: {f1_weighted:.4f}\n")

per_class_prec, per_class_rec, per_class_f1, _ = precision_recall_fscore_support(
    actual_labels, pred_labels, labels=['UP','DOWN','UNCHANGED'], zero_division=0
)
print("Per-class (labels = ['UP','DOWN','UNCHANGED']):")
for lab, p, r, f in zip(['UP','DOWN','UNCHANGED'], per_class_prec, per_class_rec, per_class_f1):
    print(f"  {lab:9s} -> Precision: {p:.4f}, Recall: {r:.4f}, F1: {f:.4f}")

print("\nFull classification report:\n")
print(classification_report(actual_labels, pred_labels, labels=['UP','DOWN','UNCHANGED'], zero_division=0))

cm = confusion_matrix(actual_labels, pred_labels, labels=['UP','DOWN','UNCHANGED'])
print("Confusion matrix (rows=actual, cols=predicted):")
print(cm)

plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=['UP','DOWN','UNCHANGED'], yticklabels=['UP','DOWN','UNCHANGED'])
plt.xlabel('Predicted'); plt.ylabel('Actual'); plt.title('Confusion Matrix (direction)')
plt.tight_layout(); plt.savefig(confusion_path); plt.close()

# -------------------------
# 11) Predict next day
# -------------------------
last_seq = scaled_all[-SEQ_LEN:]
assert last_seq.shape == (SEQ_LEN, len(FEATURES)), "Last sequence shape mismatch"

last_seq = np.expand_dims(last_seq, axis=0)
next_day_scaled = model.predict(last_seq).flatten()[0]
next_day_price = inverse_transform_close([next_day_scaled], scaler)[0]

last_close = df[CLOSE_COL_NAME].iloc[-1]
movement = "UP" if next_day_price > last_close else ("DOWN" if next_day_price < last_close else "UNCHANGED")

print("\n--- Next day prediction ---")
print(f"Last data date: {df['Date'].iloc[-1].date()}, last close: {last_close:.4f}")
print(f"Predicted next-day close (approx): {next_day_price:.4f}")
print(f"Predicted movement: {movement}")

# -------------------------
# 12) Save outputs & model artifacts (symbol-named)
# -------------------------
out_df_reg = pd.DataFrame({
    'Date': df['Date'].iloc[seq_idx[test_mask]].reset_index(drop=True),
    'Actual_Close': y_test_inv,
    'Pred_Close': y_test_pred_inv
}).reset_index(drop=True)
out_df_reg.to_csv(symbol_pred_csv, index=False)
print("Saved:", symbol_pred_csv)

out_df = pd.DataFrame({
    'Date': df['Date'].iloc[test_indices].reset_index(drop=True),
    'Prev_Close': prev_close_vals,
    'Actual_Close': y_test_inv,
    'Pred_Close': y_test_pred_inv,
    'Actual_RelChange': actual_rel,
    'Pred_RelChange': pred_rel,
    'Actual_Label': actual_labels,
    'Pred_Label': pred_labels
}).reset_index(drop=True)
out_df.to_csv(symbol_pred_labels_csv, index=False)
print("Saved:", symbol_pred_labels_csv)

last_seq_scaled_df = pd.DataFrame(scaled_all[-SEQ_LEN:], columns=FEATURES)
last_seq_scaled_df.to_csv(symbol_last_seq_csv, index=False)
print("Saved last sequence (scaled):", symbol_last_seq_csv)

joblib.dump(scaler, symbol_scaler_path)
print(f"Saved scaler to {symbol_scaler_path}")

try:
    model.save(symbol_model_h5, include_optimizer=True)
    print(f"Saved HDF5 model to: {symbol_model_h5}")
except Exception as e:
    print("Warning: saving HDF5 model failed:", e)

try:
    model.save(symbol_savedmodel_dir, include_optimizer=True)
    print(f"Saved full SavedModel to {symbol_savedmodel_dir}")
except Exception as e:
    print("Warning: saving SavedModel failed:", e)

meta = {
    "symbol": symbol,
    "FEATURES": FEATURES,
    "CLOSE_IDX": CLOSE_IDX,
    "SEQ_LEN": SEQ_LEN
}
with open(symbol_meta_json, "w") as f:
    json.dump(meta, f, indent=2)
print("Saved meta JSON:", symbol_meta_json)

# -------------------------
# 13) Write consolidated JSON for DB ingestion
# -------------------------
def to_serializable(o):
    if isinstance(o, (np.ndarray,)):
        return o.tolist()
    if isinstance(o, (np.float32, np.float64)):
        return float(o)
    if isinstance(o, (np.int32, np.int64)):
        return int(o)
    if isinstance(o, (pd.Timestamp,)):
        return str(o)
    return o

pred_records = []
test_dates_all = df['Date'].iloc[test_indices].dt.strftime('%Y-%m-%d').tolist()
for i in range(len(y_test_inv)):
    pred_records.append({
        "date": test_dates_all[i],
        "prev_close": float(prev_close_vals[i]),
        "actual_close": float(y_test_inv[i]),
        "pred_close": float(y_test_pred_inv[i]),
        "actual_relchange": float(actual_rel[i]),
        "pred_relchange": float(pred_rel[i]),
        "actual_label": str(actual_labels[i]),
        "pred_label": str(pred_labels[i])
    })

json_obj = {
    "symbol": symbol,
    "generated_at_utc": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
    "n_rows": int(n),
    "train_size": int(train_end),
    "val_size": int(val_within_train_n),
    "test_size": int(len(y_test_inv)),
    "seq_len": int(SEQ_LEN),
    "features": FEATURES,
    "regression_metrics": {"mae": float(mae), "rmse": float(rmse)},
    "classification_metrics": {
        "accuracy": float(acc),
        "macro_precision": float(prec_macro),
        "macro_recall": float(recall_macro),
        "macro_f1": float(f1_macro),
        "weighted_precision": float(prec_weighted),
        "weighted_recall": float(recall_weighted),
        "weighted_f1": float(f1_weighted),
        "per_class": {
            "UP": {"precision": float(per_class_prec[0]), "recall": float(per_class_rec[0]), "f1": float(per_class_f1[0])},
            "DOWN": {"precision": float(per_class_prec[1]), "recall": float(per_class_rec[1]), "f1": float(per_class_f1[1])},
            "UNCHANGED": {"precision": float(per_class_prec[2]), "recall": float(per_class_rec[2]), "f1": float(per_class_f1[2])}
        },
        "confusion_matrix": cm.tolist()
    },
    "next_day_prediction": {"pred_price": float(next_day_price), "pred_movement": movement, "last_close": float(last_close), "last_date": str(df['Date'].iloc[-1].date())},
    "importance": {"sentiment_rmse_increase_pct": float(importance_sentiment) if importance_sentiment is not None else None},
    "artifacts": {
        "predictions_csv": os.path.abspath(symbol_pred_csv),
        "predictions_with_labels_csv": os.path.abspath(symbol_pred_labels_csv),
        "last_sequence_csv": os.path.abspath(symbol_last_seq_csv),
        "scaler": os.path.abspath(symbol_scaler_path),
        "model_h5": os.path.abspath(symbol_model_h5) if os.path.exists(symbol_model_h5) else None,
        "saved_model_dir": os.path.abspath(symbol_savedmodel_dir) if os.path.exists(symbol_savedmodel_dir) else None,
        "meta_json": os.path.abspath(symbol_meta_json),
        "plot_prediction": os.path.abspath(pred_plot_path),
        "plot_confusion": os.path.abspath(confusion_path),
        "plot_train_loss": os.path.abspath(loss_plot_path)
    },
    "predictions": pred_records
}

with open(json_path, "w") as f:
    json.dump(json_obj, f, default=to_serializable, indent=2)
print("Wrote consolidated JSON:", json_path)

print(f"Model checkpoint (best) at: {ckpt_path}")
print("All done.")


Loaded rows: 3128
Symbol                    object
Date              datetime64[ns]
Open                     float64
High                     float64
Low                      float64
Close                    float64
Percent_Change           float64
Volume                   float64
Turnover                 float64
sentiment                float64
dtype: object
  Symbol       Date   Open   High    Low  Close  Percent_Change  Volume  \
0  NABIL 2012-01-01  873.0  879.0  851.0  879.0            0.69   799.0   
1  NABIL 2012-01-02  879.0  913.0  882.0  900.0            2.39  1002.0   
2  NABIL 2012-01-03  900.0  916.0  892.0  892.0           -0.89  2653.0   
3  NABIL 2012-01-04  892.0  875.0  842.0  865.0           -3.03   490.0   
4  NABIL 2012-01-05  865.0  875.0  858.0  858.0           -0.81   121.0   

    Turnover  sentiment  
0   684720.0        0.0  
1   897160.0        0.0  
2  2396445.0        0.0  
3   417289.0        0.0  
4   104175.0        0.0  

--- Basic EDA (raw columns pre

  super().__init__(**kwargs)


Epoch 1/100

Epoch 1: val_loss improved from None to 0.04715, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 13s - 167ms/step - loss: 0.0405 - val_loss: 0.0471 - learning_rate: 1.0000e-03
Epoch 2/100

Epoch 2: val_loss improved from 0.04715 to 0.03045, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0331 - val_loss: 0.0304 - learning_rate: 1.0000e-03
Epoch 3/100

Epoch 3: val_loss improved from 0.03045 to 0.01573, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0219 - val_loss: 0.0157 - learning_rate: 1.0000e-03
Epoch 4/100

Epoch 4: val_loss improved from 0.01573 to 0.01257, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0183 - val_loss: 0.0126 - learning_rate: 1.0000e-03
Epoch 5/100

Epoch 5: val_loss did not improve from 0.01257
78/78 - 5s - 61ms/step - loss: 0.0173 - val_loss: 0.0141 - learning_rate: 1.0000e-03
Epoch 6/100

Epoch 6: val_loss did not improve from 0.01257
78/78 - 5s - 61ms/step - loss: 0.0163 - val_loss: 0.0130 - learning_rate: 1.0000e-03
Epoch 7/100

Epoch 7: val_loss improved from 0.01257 to 0.01137, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0154 - val_loss: 0.0114 - learning_rate: 1.0000e-03
Epoch 8/100

Epoch 8: val_loss did not improve from 0.01137
78/78 - 5s - 61ms/step - loss: 0.0147 - val_loss: 0.0115 - learning_rate: 1.0000e-03
Epoch 9/100

Epoch 9: val_loss improved from 0.01137 to 0.01002, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0141 - val_loss: 0.0100 - learning_rate: 1.0000e-03
Epoch 10/100

Epoch 10: val_loss improved from 0.01002 to 0.00795, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0122 - val_loss: 0.0079 - learning_rate: 1.0000e-03
Epoch 11/100

Epoch 11: val_loss did not improve from 0.00795
78/78 - 5s - 61ms/step - loss: 0.0123 - val_loss: 0.0091 - learning_rate: 1.0000e-03
Epoch 12/100

Epoch 12: val_loss improved from 0.00795 to 0.00770, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0115 - val_loss: 0.0077 - learning_rate: 1.0000e-03
Epoch 13/100

Epoch 13: val_loss did not improve from 0.00770
78/78 - 5s - 62ms/step - loss: 0.0115 - val_loss: 0.0084 - learning_rate: 1.0000e-03
Epoch 14/100

Epoch 14: val_loss improved from 0.00770 to 0.00732, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0113 - val_loss: 0.0073 - learning_rate: 1.0000e-03
Epoch 15/100

Epoch 15: val_loss improved from 0.00732 to 0.00585, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0102 - val_loss: 0.0058 - learning_rate: 1.0000e-03
Epoch 16/100

Epoch 16: val_loss did not improve from 0.00585
78/78 - 5s - 61ms/step - loss: 0.0097 - val_loss: 0.0058 - learning_rate: 1.0000e-03
Epoch 17/100

Epoch 17: val_loss did not improve from 0.00585
78/78 - 5s - 61ms/step - loss: 0.0097 - val_loss: 0.0061 - learning_rate: 1.0000e-03
Epoch 18/100

Epoch 18: val_loss improved from 0.00585 to 0.00505, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0091 - val_loss: 0.0050 - learning_rate: 1.0000e-03
Epoch 19/100

Epoch 19: val_loss did not improve from 0.00505
78/78 - 5s - 61ms/step - loss: 0.0090 - val_loss: 0.0058 - learning_rate: 1.0000e-03
Epoch 20/100

Epoch 20: val_loss improved from 0.00505 to 0.00469, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0087 - val_loss: 0.0047 - learning_rate: 1.0000e-03
Epoch 21/100

Epoch 21: val_loss did not improve from 0.00469
78/78 - 5s - 62ms/step - loss: 0.0083 - val_loss: 0.0048 - learning_rate: 1.0000e-03
Epoch 22/100

Epoch 22: val_loss did not improve from 0.00469
78/78 - 5s - 62ms/step - loss: 0.0082 - val_loss: 0.0052 - learning_rate: 1.0000e-03
Epoch 23/100

Epoch 23: val_loss did not improve from 0.00469
78/78 - 5s - 62ms/step - loss: 0.0079 - val_loss: 0.0050 - learning_rate: 1.0000e-03
Epoch 24/100

Epoch 24: val_loss improved from 0.00469 to 0.00388, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0076 - val_loss: 0.0039 - learning_rate: 1.0000e-03
Epoch 25/100

Epoch 25: val_loss did not improve from 0.00388
78/78 - 5s - 61ms/step - loss: 0.0070 - val_loss: 0.0042 - learning_rate: 1.0000e-03
Epoch 26/100

Epoch 26: val_loss did not improve from 0.00388
78/78 - 5s - 61ms/step - loss: 0.0074 - val_loss: 0.0046 - learning_rate: 1.0000e-03
Epoch 27/100

Epoch 27: val_loss did not improve from 0.00388
78/78 - 5s - 61ms/step - loss: 0.0069 - val_loss: 0.0044 - learning_rate: 1.0000e-03
Epoch 28/100

Epoch 28: val_loss did not improve from 0.00388
78/78 - 5s - 61ms/step - loss: 0.0068 - val_loss: 0.0043 - learning_rate: 1.0000e-03
Epoch 29/100

Epoch 29: val_loss improved from 0.00388 to 0.00384, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0066 - val_loss: 0.0038 - learning_rate: 1.0000e-03
Epoch 30/100

Epoch 30: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 30: val_loss did not improve from 0.00384
78/78 - 5s - 61ms/step - loss: 0.0062 - val_loss: 0.0039 - learning_rate: 1.0000e-03
Epoch 31/100

Epoch 31: val_loss improved from 0.00384 to 0.00215, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0064 - val_loss: 0.0022 - learning_rate: 5.0000e-04
Epoch 32/100

Epoch 32: val_loss improved from 0.00215 to 0.00215, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0064 - val_loss: 0.0022 - learning_rate: 5.0000e-04
Epoch 33/100

Epoch 33: val_loss improved from 0.00215 to 0.00210, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0063 - val_loss: 0.0021 - learning_rate: 5.0000e-04
Epoch 34/100

Epoch 34: val_loss did not improve from 0.00210
78/78 - 5s - 62ms/step - loss: 0.0062 - val_loss: 0.0021 - learning_rate: 5.0000e-04
Epoch 35/100

Epoch 35: val_loss did not improve from 0.00210
78/78 - 5s - 62ms/step - loss: 0.0063 - val_loss: 0.0023 - learning_rate: 5.0000e-04
Epoch 36/100

Epoch 36: val_loss did not improve from 0.00210
78/78 - 5s - 61ms/step - loss: 0.0065 - val_loss: 0.0022 - learning_rate: 5.0000e-04
Epoch 37/100

Epoch 37: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 37: val_loss improved from 0.00210 to 0.00208, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0065 - val_loss: 0.0021 - learning_rate: 5.0000e-04
Epoch 38/100

Epoch 38: val_loss did not improve from 0.00208
78/78 - 5s - 61ms/step - loss: 0.0080 - val_loss: 0.0031 - learning_rate: 2.5000e-04
Epoch 39/100

Epoch 39: val_loss did not improve from 0.00208
78/78 - 5s - 61ms/step - loss: 0.0062 - val_loss: 0.0024 - learning_rate: 2.5000e-04
Epoch 40/100

Epoch 40: val_loss did not improve from 0.00208
78/78 - 5s - 62ms/step - loss: 0.0056 - val_loss: 0.0021 - learning_rate: 2.5000e-04
Epoch 41/100

Epoch 41: val_loss improved from 0.00208 to 0.00207, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0052 - val_loss: 0.0021 - learning_rate: 2.5000e-04
Epoch 42/100

Epoch 42: val_loss improved from 0.00207 to 0.00190, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0050 - val_loss: 0.0019 - learning_rate: 2.5000e-04
Epoch 43/100

Epoch 43: val_loss improved from 0.00190 to 0.00181, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0049 - val_loss: 0.0018 - learning_rate: 2.5000e-04
Epoch 44/100

Epoch 44: val_loss improved from 0.00181 to 0.00179, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0049 - val_loss: 0.0018 - learning_rate: 2.5000e-04
Epoch 45/100

Epoch 45: val_loss improved from 0.00179 to 0.00175, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0046 - val_loss: 0.0017 - learning_rate: 2.5000e-04
Epoch 46/100

Epoch 46: val_loss improved from 0.00175 to 0.00174, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0045 - val_loss: 0.0017 - learning_rate: 2.5000e-04
Epoch 47/100

Epoch 47: val_loss improved from 0.00174 to 0.00171, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0046 - val_loss: 0.0017 - learning_rate: 2.5000e-04
Epoch 48/100

Epoch 48: val_loss did not improve from 0.00171
78/78 - 5s - 62ms/step - loss: 0.0045 - val_loss: 0.0017 - learning_rate: 2.5000e-04
Epoch 49/100

Epoch 49: val_loss improved from 0.00171 to 0.00171, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0044 - val_loss: 0.0017 - learning_rate: 2.5000e-04
Epoch 50/100

Epoch 50: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.

Epoch 50: val_loss improved from 0.00171 to 0.00170, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0045 - val_loss: 0.0017 - learning_rate: 2.5000e-04
Epoch 51/100

Epoch 51: val_loss did not improve from 0.00170
78/78 - 5s - 61ms/step - loss: 0.0048 - val_loss: 0.0018 - learning_rate: 1.2500e-04
Epoch 52/100

Epoch 52: val_loss did not improve from 0.00170
78/78 - 5s - 62ms/step - loss: 0.0042 - val_loss: 0.0018 - learning_rate: 1.2500e-04
Epoch 53/100

Epoch 53: val_loss improved from 0.00170 to 0.00168, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0042 - val_loss: 0.0017 - learning_rate: 1.2500e-04
Epoch 54/100

Epoch 54: val_loss improved from 0.00168 to 0.00162, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 64ms/step - loss: 0.0040 - val_loss: 0.0016 - learning_rate: 1.2500e-04
Epoch 55/100

Epoch 55: val_loss improved from 0.00162 to 0.00161, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 70ms/step - loss: 0.0040 - val_loss: 0.0016 - learning_rate: 1.2500e-04
Epoch 56/100

Epoch 56: val_loss did not improve from 0.00161
78/78 - 5s - 68ms/step - loss: 0.0042 - val_loss: 0.0016 - learning_rate: 1.2500e-04
Epoch 57/100

Epoch 57: val_loss improved from 0.00161 to 0.00158, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 70ms/step - loss: 0.0039 - val_loss: 0.0016 - learning_rate: 1.2500e-04
Epoch 58/100

Epoch 58: val_loss improved from 0.00158 to 0.00157, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 70ms/step - loss: 0.0040 - val_loss: 0.0016 - learning_rate: 1.2500e-04
Epoch 59/100

Epoch 59: val_loss improved from 0.00157 to 0.00156, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 70ms/step - loss: 0.0041 - val_loss: 0.0016 - learning_rate: 1.2500e-04
Epoch 60/100

Epoch 60: val_loss improved from 0.00156 to 0.00156, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 69ms/step - loss: 0.0037 - val_loss: 0.0016 - learning_rate: 1.2500e-04
Epoch 61/100

Epoch 61: val_loss did not improve from 0.00156
78/78 - 5s - 68ms/step - loss: 0.0040 - val_loss: 0.0016 - learning_rate: 1.2500e-04
Epoch 62/100

Epoch 62: val_loss improved from 0.00156 to 0.00155, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 70ms/step - loss: 0.0038 - val_loss: 0.0016 - learning_rate: 1.2500e-04
Epoch 63/100

Epoch 63: val_loss did not improve from 0.00155
78/78 - 5s - 70ms/step - loss: 0.0040 - val_loss: 0.0016 - learning_rate: 1.2500e-04
Epoch 64/100

Epoch 64: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.

Epoch 64: val_loss improved from 0.00155 to 0.00154, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 70ms/step - loss: 0.0038 - val_loss: 0.0015 - learning_rate: 1.2500e-04
Epoch 65/100

Epoch 65: val_loss improved from 0.00154 to 0.00152, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 70ms/step - loss: 0.0040 - val_loss: 0.0015 - learning_rate: 6.2500e-05
Epoch 66/100

Epoch 66: val_loss improved from 0.00152 to 0.00149, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 70ms/step - loss: 0.0038 - val_loss: 0.0015 - learning_rate: 6.2500e-05
Epoch 67/100

Epoch 67: val_loss did not improve from 0.00149
78/78 - 5s - 68ms/step - loss: 0.0038 - val_loss: 0.0015 - learning_rate: 6.2500e-05
Epoch 68/100

Epoch 68: val_loss improved from 0.00149 to 0.00148, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 6s - 71ms/step - loss: 0.0037 - val_loss: 0.0015 - learning_rate: 6.2500e-05
Epoch 69/100

Epoch 69: val_loss improved from 0.00148 to 0.00147, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 69ms/step - loss: 0.0035 - val_loss: 0.0015 - learning_rate: 6.2500e-05
Epoch 70/100

Epoch 70: val_loss improved from 0.00147 to 0.00147, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 70ms/step - loss: 0.0037 - val_loss: 0.0015 - learning_rate: 6.2500e-05
Epoch 71/100

Epoch 71: val_loss improved from 0.00147 to 0.00147, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 6s - 71ms/step - loss: 0.0035 - val_loss: 0.0015 - learning_rate: 6.2500e-05
Epoch 72/100

Epoch 72: val_loss did not improve from 0.00147
78/78 - 5s - 69ms/step - loss: 0.0037 - val_loss: 0.0015 - learning_rate: 6.2500e-05
Epoch 73/100

Epoch 73: val_loss did not improve from 0.00147
78/78 - 5s - 68ms/step - loss: 0.0037 - val_loss: 0.0015 - learning_rate: 6.2500e-05
Epoch 74/100

Epoch 74: val_loss did not improve from 0.00147
78/78 - 5s - 64ms/step - loss: 0.0036 - val_loss: 0.0015 - learning_rate: 6.2500e-05
Epoch 75/100

Epoch 75: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.

Epoch 75: val_loss improved from 0.00147 to 0.00146, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 63ms/step - loss: 0.0036 - val_loss: 0.0015 - learning_rate: 6.2500e-05
Epoch 76/100

Epoch 76: val_loss improved from 0.00146 to 0.00144, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0037 - val_loss: 0.0014 - learning_rate: 3.1250e-05
Epoch 77/100

Epoch 77: val_loss improved from 0.00144 to 0.00143, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0036 - val_loss: 0.0014 - learning_rate: 3.1250e-05
Epoch 78/100

Epoch 78: val_loss did not improve from 0.00143
78/78 - 5s - 62ms/step - loss: 0.0038 - val_loss: 0.0014 - learning_rate: 3.1250e-05
Epoch 79/100

Epoch 79: val_loss did not improve from 0.00143
78/78 - 5s - 61ms/step - loss: 0.0036 - val_loss: 0.0014 - learning_rate: 3.1250e-05
Epoch 80/100

Epoch 80: val_loss improved from 0.00143 to 0.00143, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0036 - val_loss: 0.0014 - learning_rate: 3.1250e-05
Epoch 81/100

Epoch 81: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.

Epoch 81: val_loss did not improve from 0.00143
78/78 - 5s - 61ms/step - loss: 0.0035 - val_loss: 0.0014 - learning_rate: 3.1250e-05
Epoch 82/100

Epoch 82: val_loss did not improve from 0.00143
78/78 - 5s - 62ms/step - loss: 0.0036 - val_loss: 0.0014 - learning_rate: 1.5625e-05
Epoch 83/100

Epoch 83: val_loss did not improve from 0.00143
78/78 - 5s - 61ms/step - loss: 0.0036 - val_loss: 0.0014 - learning_rate: 1.5625e-05
Epoch 84/100

Epoch 84: val_loss did not improve from 0.00143
78/78 - 5s - 61ms/step - loss: 0.0035 - val_loss: 0.0014 - learning_rate: 1.5625e-05
Epoch 85/100

Epoch 85: val_loss improved from 0.00143 to 0.00143, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0037 - val_loss: 0.0014 - learning_rate: 1.5625e-05
Epoch 86/100

Epoch 86: val_loss improved from 0.00143 to 0.00143, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 62ms/step - loss: 0.0035 - val_loss: 0.0014 - learning_rate: 1.5625e-05
Epoch 87/100

Epoch 87: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.

Epoch 87: val_loss did not improve from 0.00143
78/78 - 7s - 89ms/step - loss: 0.0035 - val_loss: 0.0014 - learning_rate: 1.5625e-05
Epoch 88/100

Epoch 88: val_loss did not improve from 0.00143
78/78 - 5s - 68ms/step - loss: 0.0036 - val_loss: 0.0014 - learning_rate: 7.8125e-06
Epoch 89/100

Epoch 89: val_loss did not improve from 0.00143
78/78 - 5s - 70ms/step - loss: 0.0035 - val_loss: 0.0014 - learning_rate: 7.8125e-06
Epoch 90/100

Epoch 90: val_loss did not improve from 0.00143
78/78 - 6s - 71ms/step - loss: 0.0034 - val_loss: 0.0014 - learning_rate: 7.8125e-06
Epoch 91/100

Epoch 91: val_loss did not improve from 0.00143
78/78 - 6s - 71ms/step - loss: 0.0034 - val_loss: 0.0014 - learning_rate: 7.8125e-06
Epoch 92/100

Epoch 92: val_loss did not improve from 0.00143
78/78 - 5s - 70ms/step - loss: 0.0034 - 



78/78 - 6s - 71ms/step - loss: 0.0036 - val_loss: 0.0014 - learning_rate: 3.9063e-06
Epoch 95/100

Epoch 95: val_loss did not improve from 0.00142
78/78 - 5s - 70ms/step - loss: 0.0035 - val_loss: 0.0014 - learning_rate: 3.9063e-06
Epoch 96/100

Epoch 96: val_loss did not improve from 0.00142
78/78 - 5s - 70ms/step - loss: 0.0034 - val_loss: 0.0014 - learning_rate: 3.9063e-06
Epoch 97/100

Epoch 97: val_loss improved from 0.00142 to 0.00142, saving model to outputs\NABIL\best_lstm_20251105T154306Z.h5




78/78 - 5s - 69ms/step - loss: 0.0033 - val_loss: 0.0014 - learning_rate: 3.9063e-06
Epoch 98/100

Epoch 98: val_loss did not improve from 0.00142
78/78 - 5s - 70ms/step - loss: 0.0036 - val_loss: 0.0014 - learning_rate: 3.9063e-06
Epoch 99/100

Epoch 99: ReduceLROnPlateau reducing learning rate to 1.9531250927684596e-06.

Epoch 99: val_loss did not improve from 0.00142
78/78 - 5s - 70ms/step - loss: 0.0035 - val_loss: 0.0014 - learning_rate: 3.9063e-06
Epoch 100/100

Epoch 100: val_loss did not improve from 0.00142
78/78 - 6s - 71ms/step - loss: 0.0035 - val_loss: 0.0014 - learning_rate: 1.9531e-06
Restoring model weights from the end of the best epoch: 97.
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 126ms/step
Test MAE: 18.0065, RMSE: 22.9498
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Sentiment importance (RMSE increase % if shuffled): -0.59%
Actual label distr




--- Next day prediction ---
Last data date: 2025-09-18, last close: 483.0000
Predicted next-day close (approx): 522.9658
Predicted movement: UP
Saved: outputs\NABIL\NABIL_test_predictions.csv
Saved: outputs\NABIL\NABIL_test_predictions_with_labels.csv
Saved last sequence (scaled): outputs\NABIL\NABIL_last_sequence_features_scaled.csv
Saved scaler to outputs\NABIL\NABIL_scaler.joblib
Saved HDF5 model to: outputs\NABIL\NABIL_model.h5
Saved meta JSON: outputs\NABIL\NABIL_meta.json
Wrote consolidated JSON: outputs\NABIL\NABIL.json
Model checkpoint (best) at: outputs\NABIL\best_lstm_20251105T154306Z.h5
All done.
