# PERBANDINGAN METODE KLASTER K-MEDOIDS DAN K-MEANS TERHADAP HASIL PERAMALAN KEMISKINAN DI INDONESIA MENGGUNAKAN BPNN
### by Riansyah Fazar Ramadhan

## Import Dependencies

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.ar_model import AutoReg

## Data Preprocessing

In [None]:
data = pd.read_excel('E:\GitHub\sherlock-final-project\pooled_data.xlsx', sheet_name="data")
data = data.iloc[:, 1:4]
data_ar = np.array(data)

## ACF-PACF

In [None]:
num_col= len(data.columns)

# Create a figure and plot each column using a loop
plt.figure(figsize=(10, 8))

for i, column in enumerate(data.columns):
    plt.subplot(num_col, 1, i + 1)
    plt.plot(data[column])
    plt.title(column)

# Adjust layout to prevent overlapping
plt.tight_layout()

# Display the plot
plt.show()

In [None]:
for column in data.columns:
    fig, axes = plt.subplots(1, 2, figsize=(16, 4))
    
    # Plot ACF
    plot_acf(data[column], ax=axes[0], lags=67)
    axes[0].set_title(f'ACF of {column}')
    
    # Plot PACF
    plot_pacf(data[column], ax=axes[1], lags=67)
    axes[1].set_title(f'PACF of {column}')
    
    # Display the plots
    plt.tight_layout()
    plt.show()

## 5. Terasvirta Linearity Test
#### Done in R language, check at terasvirtatest.R

## 6. Standarization

In [None]:
from sklearn.preprocessing import StandardScaler

def standardize_data(data):
    scaler = StandardScaler()
    data_norm = scaler.fit_transform(data)
    return data_norm

data_norm = standardize_data(data)

## 7. Data Splitting

In [None]:
def windowed_dataset(series, batch_size, n_past, n_future, shift):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(size=n_past + n_future, shift=shift, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(n_past + n_future))
    ds = ds.map(lambda w: (w[:n_past], w[n_past:]))
    return ds.batch(batch_size).prefetch(1)

In [None]:
BATCH_SIZE = 1
N_PAST = 68
N_FUTURE = 34
SHIFT = 1
SPLIT_TIME = 306

In [None]:
x1_train = data_norm[:SPLIT_TIME,0]
x1_test = data_norm[SPLIT_TIME:,0]
x2_train = data_norm[:SPLIT_TIME,1]
x2_test = data_norm[SPLIT_TIME:,1]
x3_train = data_norm[:SPLIT_TIME,2]
x3_test = data_norm[SPLIT_TIME:,2]

train_set1 = windowed_dataset(series=x1_train, batch_size=BATCH_SIZE,
                            n_past=N_PAST, n_future=N_FUTURE,
                            shift=SHIFT)
test_set1 = windowed_dataset(series=x1_test, batch_size=BATCH_SIZE,
                            n_past=N_PAST, n_future=N_FUTURE,
                            shift=SHIFT)

train_set2 = windowed_dataset(series=x1_train, batch_size=BATCH_SIZE,
                            n_past=N_PAST, n_future=N_FUTURE,
                            shift=SHIFT)
test_set2 = windowed_dataset(series=x1_test, batch_size=BATCH_SIZE,
                            n_past=N_PAST, n_future=N_FUTURE,
                            shift=SHIFT)

train_set3 = windowed_dataset(series=x1_train, batch_size=BATCH_SIZE,
                            n_past=N_PAST, n_future=N_FUTURE,
                            shift=SHIFT)
test_set3 = windowed_dataset(series=x1_test, batch_size=BATCH_SIZE,
                            n_past=N_PAST, n_future=N_FUTURE,
                            shift=SHIFT)

## 8. Neural Network Architecture

In [None]:
def mape(y_true, y_pred):
    epsilon = tf.keras.backend.epsilon()
    y_true = tf.maximum(y_true, epsilon)
    
    # Calculate MAPE
    mape = tf.reduce_mean(tf.abs((y_true - y_pred) / y_true))
    return mape

In [None]:
def modelling(train_dataset, test_dataset, epoch, loss_function, metrics, optimizer, activation, hidden_node):
        model = tf.keras.models.Sequential([
                tf.keras.layers.Input(shape=(5,)),
                tf.keras.layers.Dense(hidden_node, activation=activation),
                tf.keras.layers.Dense(1)
                ])
        model.compile(loss=loss_function, optimizer=optimizer, metrics=[metrics])
        history = model.fit(train_dataset, epochs=epoch, validation_data=test_dataset)
        if __name__ == '__main__':
                model.save(f"model_{hidden_node}.h5")
        final_loss = history.history['loss'][-1]
        weights = model.get_weights()
        return final_loss, weights[0], weights[1], weights[2], weights[3]

In [None]:
model_sum = pd.DataFrame(columns=['mse', 'hl_weights', 'hl_bias', 'ol_weights', 'ol_bias'])
for n in range(1, 21):
    a,b,c,d,e = modelling(train_dataset=train_dataset, 
                            test_dataset=test_dataset, 
                            epoch=100, 
                            loss_function='mse',
                            metrics = 'mae', 
                            optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
                            activation='sigmoid',
                            hidden_node=n)
    model_sum.loc[n] = [a,b,c,d,e]                

In [None]:
model_sum.to_excel('model_train_sumary.xlsx')

In [None]:
import tensorflow as tf
import numpy as np

# Load the trained model
model = tf.keras.models.load_model('model_5.h5')

# Assuming `data` is your complete time series data and has already been standardized
N_PAST = 34
N_FUTURE = 34
num_future_periods = 136  # Number of future observations you want to predict

# Starting with the last 34 observations from your standardized data
current_input = data_norm[-N_PAST:].reshape((1, N_PAST, N_FEATURES))

# Placeholder for all future predictions
future_predictions = []

for _ in range(num_future_periods // N_FUTURE):
    # Make the forecast for the next N_FUTURE periods
    prediction = model.predict(current_input)
    
    # Append the prediction to the list
    future_predictions.append(prediction)
    
    # Prepare the input for the next prediction
    current_input = np.append(current_input[:, N_FUTURE:, :], prediction, axis=1)

# Convert list of arrays into a single array
future_predictions = np.concatenate(future_predictions, axis=1)

# If you need the predictions in the original scale, remember to inverse transform
# future_predictions = scaler.inverse_transform(future_predictions)
print(future_predictions)
