In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings("ignore")

In [8]:
# Load dataset
df = pd.read_csv("avg_combined_dataset.csv")
df['year'] = pd.to_datetime(df['year'], format="%Y")
df = df.sort_values(by=['place_name', 'year'])

In [9]:
predictions = []

In [10]:
# Loop per tempat wisata
for place in df['place_name'].unique():
    df_place = df[df['place_name'] == place].copy()

    if len(df_place) < 3:
        print(f"Tempat '{place}' dilewati karena jumlah data < 3.")
        continue

    data_ts = df_place[['total_visitor']].values
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data_ts)

    # ===== LSTM SECTION =====
    def create_sequences(data, n_steps=2):
        X, y = [], []
        for i in range(len(data) - n_steps):
            X.append(data[i:i+n_steps])
            y.append(data[i+n_steps])
        return np.array(X), np.array(y)

    n_steps = 2
    X, y = create_sequences(data_scaled, n_steps)

    if len(X) == 0:
        print(f"Tempat '{place}' dilewati karena data kurang untuk sequence.")
        continue

    X = X.reshape((X.shape[0], X.shape[1], 1))

    model = Sequential()
    model.add(LSTM(64, activation='relu', input_shape=(n_steps, 1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    model.fit(X, y, epochs=200, verbose=0)

    last_sequence = data_scaled[-n_steps:].reshape((1, n_steps, 1))
    next_pred_scaled = model.predict(last_sequence)
    next_pred_lstm = scaler.inverse_transform(next_pred_scaled)[0][0]

    y_pred_lstm = scaler.inverse_transform(model.predict(X))
    y_true = scaler.inverse_transform(y)

    mae_lstm = mean_absolute_error(y_true, y_pred_lstm)
    rmse_lstm = np.sqrt(mean_squared_error(y_true, y_pred_lstm))
    mape_lstm = np.mean(np.abs((y_true - y_pred_lstm) / y_true)) * 100

    # ===== ARIMA SECTION =====
    try:
        arima_model = ARIMA(df_place['total_visitor'], order=(2, 1, 0))
        arima_fit = arima_model.fit()
        forecast_arima = arima_fit.forecast(steps=1)
        next_pred_arima = forecast_arima.iloc[0]

        y_pred_arima = arima_fit.predict(start=1, end=len(df_place)-1, typ='levels')
        y_true_arima = df_place['total_visitor'].values[1:]

        mae_arima = mean_absolute_error(y_true_arima, y_pred_arima)
        rmse_arima = np.sqrt(mean_squared_error(y_true_arima, y_pred_arima))
        mape_arima = np.mean(np.abs((y_true_arima - y_pred_arima) / y_true_arima)) * 100
    except:
        print(f"ARIMA gagal untuk tempat '{place}'.")
        next_pred_arima = np.nan
        mae_arima = rmse_arima = mape_arima = np.nan

    year_next = df_place['year'].dt.year.max() + 1
    avg_rating = df_place["avg_rating"].mean()

    predictions.append({
        'place_name': place,
        'predicted_year': year_next,
        'predicted_visitors_lstm': int(next_pred_lstm),
        'predicted_visitors_arima': int(next_pred_arima) if not np.isnan(next_pred_arima) else None,
        'avg_rating': round(avg_rating, 2),
        'MAE_LSTM': round(mae_lstm, 2),
        'RMSE_LSTM': round(rmse_lstm, 2),
        'MAPE_LSTM': round(mape_lstm, 2),
        'MAE_ARIMA': round(mae_arima, 2) if not np.isnan(mae_arima) else None,
        'RMSE_ARIMA': round(rmse_arima, 2) if not np.isnan(rmse_arima) else None,
        'MAPE_ARIMA': round(mape_arima, 2) if not np.isnan(mape_arima) else None,
    })


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 182ms/step
Tempat 'desa wisata duren sari sawahan' dilewati karena jumlah data < 3.
Tempat 'desa wisata pandean dongko' dilewati karena jumlah data < 3.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 182ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

In [11]:
# Konversi ke DataFrame
df_pred = pd.DataFrame(predictions)

In [13]:

# Normalisasi skor visitor dan rating berdasarkan prediksi LSTM
scaler = MinMaxScaler()
df_pred["scaled_visitors"] = scaler.fit_transform(df_pred[["predicted_visitors_lstm"]])
df_pred["scaled_rating"] = scaler.fit_transform(df_pred[["avg_rating"]])

In [14]:
# Hitung skor gabungan (bobot 70% visitor, 30% rating)
df_pred["popularity_score"] = 0.7 * df_pred["scaled_visitors"] + 0.3 * df_pred["scaled_rating"]

In [15]:
# Urutkan dari skor terbaik
df_ranked = df_pred.sort_values(by="popularity_score", ascending=False)

In [21]:
# Ambil 5 besar
top5 = df_ranked.head(5)

print("\nTop 5 Tempat Wisata Berdasarkan Prediksi (LSTM) dan Rating:")
print(top5[["place_name", "predicted_visitors_lstm", "predicted_visitors_arima",
            "avg_rating", "MAE_LSTM", "RMSE_LSTM", "MAPE_LSTM",
            "MAE_ARIMA", "RMSE_ARIMA", "MAPE_ARIMA", "popularity_score"]])


Top 5 Tempat Wisata Berdasarkan Prediksi (LSTM) dan Rating:
                   place_name  predicted_visitors_lstm  \
14         taman impian ancol                 11011633   
1                 dufan ancol                  2362699   
12             seaworld ancol                  1271066   
6         pantai karanggongso                   606511   
7   pantai mutiara trenggalek                   733784   

    predicted_visitors_arima  avg_rating     MAE_LSTM    RMSE_LSTM  MAPE_LSTM  \
14                   5841634        4.60 1,154,734.33 1,258,509.31      11.89   
1                    1896715        4.56    82,403.37    99,440.08       3.72   
12                    642718        4.58   131,544.71   147,080.38      10.91   
6                     505751        4.54       867.66       867.66       0.22   
7                     892842        4.47       661.12       661.12       0.15   

      MAE_ARIMA   RMSE_ARIMA  MAPE_ARIMA  popularity_score  
14 3,137,674.08 4,574,118.55       48.19  