In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from datetime import datetime
from datetime import timedelta

In [None]:
# The dataset has been converted into batches via "split_csv.ipynb". 1 batch contains 10 stocks ipynb files. 
# Change the batch folder to do training model and predictions for certain stocks.

TRAINING_MODEL_PATH = '../trainingModel'
DATASET_PATH = f'../trainingDataset'
LOG_DIR = '../trainingLogs'
ACCURACY_THRESHOLD = 90
MAX_RETRIES = 3
WINDOW_SIZE = 32
BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 1000
SPLIT_TIME = 3000

In [None]:
def normalize_feature(data):
    mean = np.mean(data)
    std = np.std(data)
    normalized_data = (data - mean) / std
    return normalized_data, (mean, std)

In [None]:
def denormalize_data(data, stats):
    stats = np.array(stats)
    means = stats[:, 0]
    stds = stats[:, 1]
    return data * stds + means

In [None]:
def parse_data_from_file(filename):
    data = np.loadtxt(filename, delimiter=',', skiprows=1, usecols=(1, 2, 3))
    low, high, close = data[:, 0], data[:, 1], data[:, 2]

    low_normalized, stats_low = normalize_feature(low)
    high_normalized, stats_high = normalize_feature(high)
    close_normalized, stats_close = normalize_feature(close)

    features = np.stack([low_normalized, high_normalized, close_normalized], axis=1)
    times = np.arange(len(data))

    return times, features, stats_low, stats_high, stats_close

In [None]:
def format_decimal(value):
    """Format angka dalam format desimal dengan 2 tempat desimal."""
    return "{:.0f}".format(value)

In [None]:
def forecast_and_save(models_path, dataset_path, output_path, window_size):
    """
    Load models, forecast 7 days ahead, and save predictions to CSV files.
    """
    for model_file in os.listdir(models_path):
        if model_file.endswith('.h5'):
            model_path = os.path.join(models_path, model_file)
            model = tf.keras.models.load_model(model_path)

            # Extract the corresponding dataset file
            dataset_name = model_file.split('_')[0] + '.csv'
            dataset_file = os.path.join(dataset_path, dataset_name)

            if not os.path.exists(dataset_file):
                print(f"Dataset file for {model_file} not found. Skipping...")
                continue

            print(f"Forecasting for model {model_file} using dataset {dataset_name}...")

            # Load and preprocess the dataset
            _, features, stats_low, stats_high, stats_close = parse_data_from_file(dataset_file)
            stats = [stats_low, stats_high, stats_close]

            # Use the last `window_size` data points for forecasting
            input_data = features[-window_size:]
            input_data = input_data[np.newaxis, :]  # Add batch dimension

            # Forecast for 7 days
            forecast = []
            for _ in range(15):
                pred = model.predict(input_data)
                forecast.append(pred.squeeze())
                # Append the prediction to the input data for next step
                input_data = np.roll(input_data, -1, axis=1)
                input_data[0, -1] = pred

            # Denormalize the forecast
            forecast = np.array(forecast)
            forecast_denorm = denormalize_data(forecast, stats)

            # Load the original dataset to get the last date
            original_data = pd.read_csv(dataset_file)
            last_date = pd.to_datetime(original_data['timestamp'].iloc[-1])

            # Generate dates for the forecast
            forecast_dates = [last_date + timedelta(days=i) for i in range(1, 15)]

            # Format the forecast and save to CSV
            output_file = os.path.join(output_path, f"{os.path.splitext(model_file)[0]}_forecast.csv")
            with open(output_file, 'w') as f:
                # Write header
                f.write('timestamp,low,high,close\n')
                # Write each formatted row
                for date, row in zip(forecast_dates, forecast_denorm):
                    formatted_row = [date.strftime('%Y-%m-%d')] + [format_decimal(value) for value in row]
                    f.write(','.join(formatted_row) + '\n')

            print(f"Saved forecast to {output_file}")

In [None]:
# Define paths and call the function
OUTPUT_PATH = '../forecastResults'
os.makedirs(OUTPUT_PATH, exist_ok=True)
forecast_and_save(TRAINING_MODEL_PATH, DATASET_PATH, OUTPUT_PATH, WINDOW_SIZE)