In [1]:
pip install numpy pandas matplotlib scikit-learn pmdarima prophet xgboost tensorflow optuna



In [2]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping
import optuna
from google.colab import drive

In [3]:
drive.mount('/content/drive')

DATA_PATH = '/content/drive/MyDrive/projek/dataset/dataInflasi/inflation_clean_20250622_183819.csv'
assert os.path.exists(DATA_PATH), "File CSV tidak ditemukan!"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
df = pd.read_csv(DATA_PATH, parse_dates=['date'], index_col='date').sort_index()
df = df.rename(columns={'inflation':'y'})

In [5]:
for lag in [1,3,6]:
    df[f'lag_{lag}'] = df['y'].shift(lag)
for win in [3,6]:
    df[f'roll_{win}'] = df['y'].shift(1).rolling(win).mean()
df_ml = df.dropna().copy()

In [6]:
n_hold = 6
train_ml, test_ml = df_ml.iloc[:-n_hold], df_ml.iloc[-n_hold:]
y_train, y_test = train_ml['y'], test_ml['y']
X_train, X_test = train_ml.drop(columns='y'), test_ml.drop(columns='y')

In [7]:
def metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return mae, rmse

In [8]:
def rf_objective(trial):
    params = {'n_estimators': trial.suggest_int('n_estimators', 50, 300), 'max_depth': trial.suggest_int('max_depth', 3, 15), 'min_samples_split': trial.suggest_int('min_samples_split', 2, 10), 'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 5), 'random_state': 42}
    model = RandomForestRegressor(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_train)
    return mean_absolute_error(y_train, preds)

study = optuna.create_study(direction='minimize')
study.optimize(rf_objective, n_trials=30)
best_rf_params = study.best_params
print("Best RF params:", best_rf_params)

rf = RandomForestRegressor(**best_rf_params, random_state=42)
rf.fit(X_train, y_train)
pred_rf = rf.predict(X_test)
mae_rf, rmse_rf = metrics(y_test, pred_rf)
print("RF to MAE:", mae_rf, "RMSE:", rmse_rf)

[I 2025-06-22 17:58:47,681] A new study created in memory with name: no-name-835083e4-9f4d-4886-ad1a-109af1f28dc8
[I 2025-06-22 17:58:47,928] Trial 0 finished with value: 0.5028194279487137 and parameters: {'n_estimators': 110, 'max_depth': 3, 'min_samples_split': 3, 'min_samples_leaf': 5}. Best is trial 0 with value: 0.5028194279487137.
[I 2025-06-22 17:58:48,042] Trial 1 finished with value: 0.3554826857411624 and parameters: {'n_estimators': 59, 'max_depth': 9, 'min_samples_split': 10, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.3554826857411624.
[I 2025-06-22 17:58:48,430] Trial 2 finished with value: 0.31097018575534624 and parameters: {'n_estimators': 168, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.31097018575534624.
[I 2025-06-22 17:58:49,013] Trial 3 finished with value: 0.37316157736677313 and parameters: {'n_estimators': 258, 'max_depth': 12, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 2 with val

Best RF params: {'n_estimators': 229, 'max_depth': 11, 'min_samples_split': 2, 'min_samples_leaf': 1}
RF to MAE: 0.6123586851043191 RMSE: 0.7853047316981917


In [9]:
#ARIMA (univariate)
model_arima = ARIMA(y_train, order=(1,1,1)).fit()
pred_arima = model_arima.forecast(steps=n_hold)
mae_arima, rmse_arima = metrics(y_test, pred_arima)
print("ARIMA to MAE:", mae_arima, "RMSE:", rmse_arima)

ARIMA to MAE: 0.5713552773486761 RMSE: 0.7757616633724


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [10]:
#Facebook Prophet
regs = ['lag_1','lag_3','lag_6','roll_3','roll_6']
df_prop = df_ml.reset_index().rename(columns={'date':'ds','y':'y'})
prop_train = df_prop.iloc[:-n_hold]
m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
for col in regs:
    m.add_regressor(col)
m.fit(prop_train)

future = m.make_future_dataframe(periods=n_hold, freq='M')
future = future.merge(df_prop[['ds']+regs], on='ds', how='left')
last_vals = df_prop[regs].iloc[-1]
for col in regs:
    future[col].fillna(last_vals[col], inplace=True)

fcst = m.predict(future)
pred_prop = fcst.set_index('ds')['yhat'].iloc[-n_hold:]
mae_prop, rmse_prop = metrics(y_test, pred_prop)
print("Prophet to MAE:", mae_prop, "RMSE:", rmse_prop)


DEBUG:cmdstanpy:input tempfile: /tmp/tmpc4cxftco/goe9sgpj.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpc4cxftco/zpjne_3f.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=58050', 'data', 'file=/tmp/tmpc4cxftco/goe9sgpj.json', 'init=/tmp/tmpc4cxftco/zpjne_3f.json', 'output', 'file=/tmp/tmpc4cxftco/prophet_modelsjxtros0/prophet_model-20250622175906.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:59:06 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:59:06 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


Prophet to MAE: 0.796903164126029 RMSE: 1.0599748139152356


  dates = pd.date_range(
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  future[col].fillna(last_vals[col], inplace=True)


In [11]:
#XGBoostRegressor
xgb = XGBRegressor(n_estimators=200, max_depth=4, learning_rate=0.1, random_state=42)
xgb.fit(X_train, y_train)
pred_xgb = xgb.predict(X_test)
mae_xgb, rmse_xgb = metrics(y_test, pred_xgb)
print("XGBoost to MAE:", mae_xgb, "RMSE:", rmse_xgb)

XGBoost to MAE: 0.612481013139089 RMSE: 0.7854452425191203


In [12]:
#Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
pred_lr = lr.predict(X_test)
mae_lr, rmse_lr = metrics(y_test, pred_lr)
print("LinReg to MAE:", mae_lr, "RMSE:", rmse_lr)

LinReg to MAE: 0.6614442245000506 RMSE: 0.7337662383129492


In [13]:
#LSTM Neural Network
series = df['y'].values.reshape(-1,1)
scaler = MinMaxScaler()
scaled = scaler.fit_transform(series)

def create_dataset(ds, time_step=12):
    X, y = [], []
    for i in range(len(ds)-time_step):
        X.append(ds[i:i+time_step,0])
        y.append(ds[i+time_step,0])
    return np.array(X), np.array(y)

time_step = 12
X_l, y_l = create_dataset(scaled, time_step)
X_l = X_l.reshape(X_l.shape[0], X_l.shape[1], 1)
train_size = int(len(X_l)*0.8)
Xl_train, Xl_test = X_l[:train_size], X_l[train_size:]
yl_train, yl_test = y_l[:train_size], y_l[train_size:]

model_lstm = Sequential([LSTM(64, return_sequences=True, input_shape=(time_step,1)), Dropout(0.2), LSTM(32), Dropout(0.2), Dense(1)])
model_lstm.compile(optimizer='adam', loss='mse')
es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model_lstm.fit(Xl_train, yl_train, validation_data=(Xl_test, yl_test),epochs=100, batch_size=16, callbacks=[es], verbose=1)

yl_pred = model_lstm.predict(Xl_test)
yl_pred_inv = scaler.inverse_transform(yl_pred)
yl_test_inv = scaler.inverse_transform(yl_test.reshape(-1,1))
mae_lstm, rmse_lstm = metrics(yl_test_inv.flatten(), yl_pred_inv.flatten())
print("LSTM to MAE:", mae_lstm, "RMSE:", rmse_lstm)

  super().__init__(**kwargs)


Epoch 1/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 45ms/step - loss: 0.0642 - val_loss: 0.0138
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0219 - val_loss: 0.0046
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0224 - val_loss: 0.0090
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0212 - val_loss: 0.0053
Epoch 5/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0199 - val_loss: 0.0056
Epoch 6/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.0173 - val_loss: 0.0051
Epoch 7/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0163 - val_loss: 0.0045
Epoch 8/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0127 - val_loss: 0.0051
Epoch 9/100
[1m13/13[0m [32m━━━━━━━━━

In [14]:
results = pd.DataFrame({'model': ['ARIMA','Prophet','RandomForest','XGBoost','LinearReg','LSTM'], 'MAE': [mae_arima, mae_prop, mae_rf, mae_xgb, mae_lr, mae_lstm], 'RMSE': [rmse_arima, rmse_prop, rmse_rf, rmse_xgb, rmse_lr, rmse_lstm]})
results = results.sort_values(by='MAE').reset_index(drop=True)
print(results)

          model       MAE      RMSE
0          LSTM  0.435759  0.574462
1         ARIMA  0.571355  0.775762
2  RandomForest  0.612359  0.785305
3       XGBoost  0.612481  0.785445
4     LinearReg  0.661444  0.733766
5       Prophet  0.796903  1.059975


In [15]:
horizon = 5
last_date = df.index[-1]
future_dates = [last_date + pd.DateOffset(months=i) for i in range(1, horizon+1)]

preds_arima = model_arima.predict(n_periods=horizon)

fut = m.make_future_dataframe(periods=horizon, freq='M')
fut = fut.merge(df_prop[['ds'] + regs], on='ds', how='left')
for col in regs:
    fut[col] = fut[col].fillna(last_vals[col])

fc2 = m.predict(fut)
preds_prop = fc2['yhat'].iloc[-horizon:].values

history = df['y'].tolist()
preds_rf, preds_xgb, preds_lr = [], [], []
for _ in range(horizon):
    feat = {'lag_1': history[-1], 'lag_3': history[-3], 'lag_6': history[-6], 'roll_3': pd.Series(history).shift(1).rolling(3).mean().iloc[-1], 'roll_6': pd.Series(history).shift(1).rolling(6).mean().iloc[-1]}
    Xn = pd.DataFrame([feat])
    p_rf  = rf.predict(Xn)[0]
    p_xgb = xgb.predict(Xn)[0]
    p_lr  = lr.predict(Xn)[0]
    preds_rf.append(p_rf); preds_xgb.append(p_xgb); preds_lr.append(p_lr)
    history.append(p_rf)

X_future = scaled[-time_step:].reshape(1, time_step, 1)
preds_lstm = []
for _ in range(horizon):
    p = model_lstm.predict(X_future)[0,0]
    preds_lstm.append(p)
    X_future = np.roll(X_future, -1)
    X_future[0, -1, 0] = p
preds_lstm = scaler.inverse_transform(np.array(preds_lstm).reshape(-1,1)).flatten()

pred_df = pd.DataFrame({'Model': ['Prophet','ARIMA','RandomForest','XGBoost','LinearReg','LSTM']})
for i, dt in enumerate(future_dates):
    col = dt.strftime('%Y-%m')
    pred_df[col] = [preds_prop[i], preds_arima[i], preds_rf[i], preds_xgb[i], preds_lr[i], preds_lstm[i]]
# Format persen
for col in pred_df.columns[1:]:
    pred_df[col] = pred_df[col].map(lambda x: f"{x:.2f}%")

print(pred_df)


  dates = pd.date_range(


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
          Model 2025-06 2025-07 2025-08 2025-09 2025-10
0       Prophet   2.01%   2.06%   2.02%   1.49%   1.99%
1         ARIMA   0.00%   6.98%   6.13%   6.62%   6.24%
2  RandomForest   1.60%   1.58%   1.64%   1.61%   1.62%
3       XGBoost   1.60%   1.65%   1.59%   1.59%   1.68%
4     LinearReg   2.15%   2.13%   1.70%   1.93%   1.84%
5          LSTM   1.33%   1.39%   1.41%   1.41%   1.40%


  pred_df[col] = [preds_prop[i], preds_arima[i], preds_rf[i], preds_xgb[i], preds_lr[i], preds_lstm[i]]
  pred_df[col] = [preds_prop[i], preds_arima[i], preds_rf[i], preds_xgb[i], preds_lr[i], preds_lstm[i]]
  pred_df[col] = [preds_prop[i], preds_arima[i], preds_rf[i], preds_xgb[i], preds_lr[i], preds_lstm[i]]
  pred_df[col] = [preds_prop[i], preds_arima[i], preds_rf[i], preds_xgb[i], preds_lr[i], preds_lstm[i]]
  pred_df[col] = [preds_prop[i], preds_arima[i], preds_rf[i], preds_xgb[i], preds_lr[i], preds_lstm[i]]
