In [1]:
import pandas as pd
import numpy as np

In [20]:
df = pd.read_csv("df_train.csv")  # Sesuaikan dengan nama file
df['date'] = pd.to_datetime(df['date'])  # Pastikan kolom date dalam format datetime
df = df.sort_values(by='date')  # Urutkan berdasarkan tanggal

In [21]:
# Cek tanggal tertua
oldest_date = df['date'].min()
print(f"Tanggal tertua: {oldest_date}")

# Cek duplikat
duplicate_dates = df['date'].duplicated().any()
print(f"Apakah ada duplikat pada kolom 'date'? {'Ya' if duplicate_dates else 'Tidak'}")

Tanggal tertua: 2020-08-01 00:00:00
Apakah ada duplikat pada kolom 'date'? Ya


In [22]:
df.columns


Index(['date', 'warehouse', 'total_orders', 'sales', 'sell_price_main',
       'total_discount', 'shops_closed', 'total_holidays'],
      dtype='object')

In [3]:
def create_sequences(data, lookback=60, horizon=14):
    X, y = [], []
    for i in range(len(data) - lookback - horizon):
        X.append(data[i:i+lookback])  # Ambil 60 hari sebelumnya
        y.append(data[i+lookback:i+lookback+horizon, -1])  # Ambil 14 hari ke depan (kolom sales)
    return np.array(X), np.array(y)

In [4]:
df.columns

Index(['date', 'warehouse', 'total_orders', 'sales', 'sell_price_main',
       'total_discount', 'shops_closed', 'total_holidays'],
      dtype='object')

In [5]:
feature_cols = ['warehouse', 'total_orders', 'sell_price_main', 'total_discount', 'shops_closed', 'total_holidays']
target_col = ["sales"]

In [6]:
# Ambil hanya fitur yang diperlukan + target
data_values = df[feature_cols + target_col].values  

# Ubah ke dalam format time step
X, y = create_sequences(data_values, lookback=60, horizon=14)

print(f"Shape X: {X.shape}")  # (samples, 60, fitur)
print(f"Shape y: {y.shape}")  # (samples, 14) -> memprediksi 14 hari ke depa

Shape X: (4007345, 60, 7)
Shape y: (4007345, 14)


In [7]:
from sklearn.model_selection import train_test_split

# Membagi data menjadi 80% train dan 20% validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)

# Cek ukuran hasil split
print(f"Training Data Shape: X_train={X_train.shape}, y_train={y_train.shape}")
print(f"Validation Data Shape: X_val={X_val.shape}, y_val={y_val.shape}")


Training Data Shape: X_train=(3205876, 60, 7), y_train=(3205876, 14)
Validation Data Shape: X_val=(801469, 60, 7), y_val=(801469, 14)


In [8]:
num_features = X_train.shape[2]  # Harus 7
time_steps = X_train.shape[1]

In [17]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(time_steps, num_features)),  
    Dropout(0.2),
    Dense(32),
    Dropout(0.2),
    LSTM(32, return_sequences=False),  
    Dense(14)  # Output untuk 14 hari ke depan
])
model.summary()

In [18]:
from tensorflow.keras.optimizers import Adam
Adam = Adam(learning_rate=0.001)
model.compile(optimizer=Adam, loss='mse')

In [19]:
history = model.fit(X_train, y_train, epochs=10, batch_size=6000, 
                    validation_data=(X_val, y_val),
                    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)])


Epoch 1/10
[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1094s[0m 2s/step - loss: 134923.2500 - val_loss: 159655.8750
Epoch 2/10
[1m398/535[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m3:56[0m 2s/step - loss: 131723.3750

KeyboardInterrupt: 

In [25]:
df_test = pd.read_csv("df_test.csv")

In [26]:
# Ambil tanggal pertama di test set
start_test_date = df_test['date'].min()

# Ambil 60 hari terakhir sebelum tanggal pertama di test
lookback = 60
last_60_days = df[df['date'] < start_test_date].tail(lookback)

# Konversi ke format LSTM
X_test = last_60_days[feature_cols].values.reshape(1, lookback, len(feature_cols))

In [27]:
# Prediksi menggunakan model
predicted_sales = model.predict(X_test)  # Output shape (1, 14)


ValueError: Exception encountered when calling LSTMCell.call().

[1mDimensions must be equal, but are 6 and 7 for '{{node sequential_5_1/lstm_10_1/lstm_cell_1/MatMul}} = MatMul[T=DT_FLOAT, grad_a=false, grad_b=false, transpose_a=false, transpose_b=false](sequential_5_1/lstm_10_1/strided_slice_1, sequential_5_1/lstm_10_1/lstm_cell_1/Cast/ReadVariableOp)' with input shapes: [1,6], [7,256].[0m

Arguments received by LSTMCell.call():
  • inputs=tf.Tensor(shape=(1, 6), dtype=float32)
  • states=('tf.Tensor(shape=(1, 64), dtype=float32)', 'tf.Tensor(shape=(1, 64), dtype=float32)')
  • training=False