In [1]:
# Impor pustaka yang dibutuhkan
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping
import glob

In [2]:
# Setel seed acak untuk reproduktibilitas
np.random.seed(42)
import tensorflow as tf
tf.random.set_seed(42)

In [3]:
# Tentukan jalur ke data dan ambil semua file CSV
data_path = "C:/D/projects/BPP PROJECT/bpp-prediction/backend/datasets/"
csv_files = glob.glob(data_path + "*.csv")

In [4]:
# Fungsi untuk memuat dan membersihkan data (menangani nilai yang hilang)
def load_and_clean_data(file_path):
    df = pd.read_csv(file_path, delimiter=';')
    df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y')
    df.set_index('Tanggal', inplace=True)
    
    # Menangani data yang hilang
    df = df.fillna(method='ffill')  # Menggunakan pengisian maju untuk menangani data yang hilang
    
    return df

In [5]:
# Memuat semua dataset
datasets = {file.split("\\")[-1]: load_and_clean_data(file) for file in csv_files}


ValueError: time data "2021-04-01" doesn't match format "%d/%m/%Y", at position 0. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

In [6]:
# Menampilkan informasi dataset
for name, df in datasets.items():
    print(f"\nDataset: {name}")
    display(df.head())
# Kamus untuk menyimpan scaler untuk setiap dataset
scalers = {}

NameError: name 'datasets' is not defined

In [None]:
# Fungsi untuk menormalkan data
def normalize_data(df, dataset_name):
    """Menormalkan data dan menyimpan scaler untuk digunakan nanti"""
    scalers[dataset_name] = MinMaxScaler(feature_range=(0, 1))
    df_scaled = scalers[dataset_name].fit_transform(df[['Harga']].values)
    return df_scaled

In [None]:
# Menormalkan semua dataset
scaled_datasets = {}
for file, df in datasets.items():
    print(f"\nMenormalkan dataset: {file}")
    scaled_datasets[file] = normalize_data(df, file)
    
    # Menampilkan sampel nilai asli dan nilai terormalisasi
    print("Nilai asli:", df['Harga'].head().values)
    print("Nilai terormalisasi:", scaled_datasets[file][:5].flatten())


In [None]:
import joblib

# Simpan semua scaler ke dalam file
scaler_path = "C:/D/projects/BPP PROJECT/bpp-prediction/backend/scalers_100/"
os.makedirs(scaler_path, exist_ok=True)

for dataset_name, scaler in scalers.items():
    joblib.dump(scaler, f"{scaler_path}{dataset_name}_scaler.pkl")

print("Semua scaler telah disimpan.")


In [None]:
# Fungsi untuk membuat urutan data untuk LSTM
def create_dataset(data, time_step=60):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

In [None]:
# Membuat urutan untuk setiap dataset
datasets_X_y = {}
for file, scaled_data in scaled_datasets.items():
    print(f"\nMembuat dataset untuk {file}")
    X, y = create_dataset(scaled_data)
    datasets_X_y[file] = (X, y)
    print(f"X shape: {X.shape}, y shape: {y.shape}")


In [None]:
# Fungsi untuk membagi data
def split_data(X, y, train_size=0.8):
    train_len = int(len(X) * train_size)
    X_train, X_test = X[:train_len], X[train_len:]
    y_train, y_test = y[:train_len], y[train_len:]
    return X_train, X_test, y_train, y_test


In [None]:
# Membagi data untuk setiap dataset
split_datasets = {}
for file, (X, y) in datasets_X_y.items():
    print(f"\nMembagi data untuk {file}")
    X_train, X_test, y_train, y_test = split_data(X, y)
    split_datasets[file] = (X_train, X_test, y_train, y_test)
    print(f"Bentuk set pelatihan: {X_train.shape}")
    print(f"Bentuk set pengujian: {X_test.shape}")


In [None]:
# Fungsi untuk membangun model LSTM
def build_model(input_shape):
    model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(units=50, return_sequences=False),
        Dropout(0.2),
        Dense(units=1)
    ])
    model.compile(optimizer=Adam(), loss='mean_squared_error')
    return model


In [None]:
# Melatih model untuk setiap dataset
model_results = {}
for file, (X_train, X_test, y_train, y_test) in split_datasets.items():
    print(f"\nMelatih model untuk {file}")
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    
    model = build_model((X_train.shape[1], 1))

    # Penghentian dini untuk mencegah overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

    history = model.fit(
        X_train, y_train,
        epochs=80,
        batch_size=32,
        validation_split=0.1,
        callbacks=[early_stopping],
        verbose=1
    )
    
    # Menyimpan model
    model.save(f"C:/D/projects/BPP PROJECT/bpp-prediction/backend/models_100/{file}_model.h5")
    model_results[file] = model
    
    #  # Menampilkan Hidden State dan Cell State pada timestep terakhir
    # print("\nEvaluasi pada data uji (X_test):")
    # output, state_h, state_c = model.predict(X_test)
    # print("Hidden State terakhir (state_h):", state_h)
    # print("Cell State terakhir (state_c):", state_c)
    
    # Plot riwayat pelatihan
    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Loss Pelatihan')
    plt.plot(history.history['val_loss'], label='Loss Validasi')
    plt.title(f'Loss Model untuk {file}')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
# Fungsi untuk evaluasi
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    return rmse, mae

def denormalize_data(scaled_data, dataset_name):
    return scalers[dataset_name].inverse_transform(scaled_data)


In [None]:
# Evaluasi model dan membuat prediksi
evaluations = {}
predictions = {}

for file, model in model_results.items():
    X_test = split_datasets[file][1].reshape(-1, 60, 1)
    y_test = split_datasets[file][3]
    
    # Membuat prediksi
    y_pred_scaled = model.predict(X_test)
    
    # Denormalisasi
    y_pred_rescaled = denormalize_data(y_pred_scaled, file)
    y_test_rescaled = denormalize_data(y_test.reshape(-1, 1), file)
    
    # Menghitung metrik
    rmse, mae = evaluate_model(model, X_test, y_test)
    evaluations[file] = {'RMSE': rmse, 'MAE': mae}
    
    predictions[file] = {
        'y_pred': y_pred_rescaled,
        'y_test': y_test_rescaled
    }


In [None]:
# Fungsi untuk memplot prediksi
def plot_predictions(file, predictions, evaluations):
    y_pred = predictions[file]['y_pred']
    y_test = predictions[file]['y_test']
    
    plt.figure(figsize=(12, 6))
    plt.plot(y_test, label='Harga Aktual', linewidth=2)
    plt.plot(y_pred, label='Harga Prediksi', linewidth=2)
    plt.title(f'Prediksi Harga BPP untuk {file}\nRMSE: {evaluations[file]["RMSE"]:.2f}, MAE: {evaluations[file]["MAE"]:.2f}')
    plt.xlabel('Waktu')
    plt.ylabel('Harga (Rupiah)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

In [None]:
# Plot hasil untuk setiap dataset
for file in predictions.keys():
    plot_predictions(file, predictions, evaluations)

# Menampilkan hasil evaluasi akhir
print("\nHasil Evaluasi Akhir:")
for name, metrics in evaluations.items():
    print(f"\nMetrik untuk {name}:")
    print(f"RMSE: {metrics['RMSE']:.4f}")
    print(f"MAE: {metrics['MAE']:.4f}")

In [None]:

model.summary()