# PERBANDINGAN METODE KLASTER K-MEDOIDS DAN K-MEANS TERHADAP HASIL PERAMALAN KEMISKINAN DI INDONESIA MENGGUNAKAN BPNN
### by Riansyah Fazar Ramadhan

## 1. Import Dependencies

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.ar_model import AutoReg




## 2. Data Preprocessing

In [2]:
data1 = pd.read_excel("E:/AKTUARIA FINAL SEASON/DATA/fpdata.xlsx", sheet_name="y", index_col=0)
data2 = pd.read_excel("E:/AKTUARIA FINAL SEASON/DATA/fpdata.xlsx", sheet_name="x1", index_col=0)
data3 = pd.read_excel("E:/AKTUARIA FINAL SEASON/DATA/fpdata.xlsx", sheet_name="x2", index_col=0)
data4 = pd.read_excel("E:/AKTUARIA FINAL SEASON/DATA/fpdata.xlsx", sheet_name="x3", index_col=0)

y = np.array(data1)
x1 = np.array(data2)
x2 = np.array(data3)
x3 = np.array(data4)

data1_flat = data1.values.flatten()
data2_flat = data2.values.flatten()
data3_flat = data3.values.flatten()
data4_flat = data4.values.flatten()

In [3]:
data = pd.DataFrame({
    'y': data1_flat,
    'x1': data2_flat,
    'x2': data3_flat,
    'x3': data4_flat
})
data

Unnamed: 0,y,x1,x2,x3
0,17.08,-1.970,7.73,0.334
1,10.53,4.990,6.39,0.336
2,7.31,5.625,5.99,0.342
3,8.42,-1.035,6.72,0.364
4,8.86,4.555,2.73,0.361
...,...,...,...,...
573,11.49,4.775,3.04,0.351
574,16.42,5.780,6.08,0.288
575,6.46,19.460,4.60,0.300
576,20.49,3.020,5.53,0.370


## 3. Descriptive Stats

In [4]:
desc_stat = data.describe()
desc_stat

Unnamed: 0,y,x1,x2,x3
count,578.0,578.0,578.0,578.0
mean,10.814758,4.499152,5.055052,0.353097
std,5.638554,4.086967,1.805693,0.039766
min,3.42,-21.3,0.88,0.236
25%,6.3875,3.32125,3.72,0.325
50%,9.09,5.075,4.68,0.351
75%,13.845,5.77375,6.17,0.382
max,28.54,25.86,10.95,0.459


In [6]:
writer = pd.ExcelWriter('pooled_data.xlsx', engine='xlsxwriter')
with pd.ExcelWriter('pooled_data.xlsx', engine='xlsxwriter') as writer:
    data.to_excel(writer, sheet_name='data', index=False)
    desc_stat.to_excel(writer, sheet_name='desc_stat')

## 4. Terasvirta Linearity Test
#### Done in R language, check at terasvirtatest.R

In [9]:
import statsmodels.api as sm

# Perform ADF test
adf_test = sm.tsa.adfuller(data1_flat)

# Extracting the results
adf_statistic = adf_test[0]
p_value = adf_test[1]
used_lag = adf_test[2]
n_obs = adf_test[3]
critical_values = adf_test[4]
aic_value = adf_test[5]

# Printing the results with labels
print("Augmented Dickey-Fuller Test Results:")
print(f"ADF Statistic: {adf_statistic}")
print(f"p-value: {p_value}")
print(f"Number of Lags Used: {used_lag}")
print(f"Number of Observations Used: {n_obs}")
print("Critical Values:")
for key, value in critical_values.items():
    print(f"    {key}: {value}")
print(f"AIC Value: {aic_value}")


Augmented Dickey-Fuller Test Results:
ADF Statistic: -9.216696012414147
p-value: 1.825734467645251e-15
Number of Lags Used: 19
Number of Observations Used: 558
Critical Values:
    1%: -3.4421235439968862
    5%: -2.866733577794069
    10%: -2.569536010842615
AIC Value: 3096.3850121272844


## 6. Min-max Normalization

In [None]:
from sklearn.preprocessing import MinMaxScaler

def standardize_data(data):
    scaler = MinMaxScaler(feature_range=(-1,1))
    data_norm = scaler.fit_transform(data)
    return data_norm, scaler

x1_norm, x1_scaler = standardize_data(data2_flat.reshape(-1,1))
x2_norm, x2_scaler = standardize_data(data3_flat.reshape(-1,1))
x3_norm, x3_scaler = standardize_data(data4_flat.reshape(-1,1))

## 7. Data Splitting

In [11]:
N_FEATURES = 3
SPLIT_TIME = 442
data_train = data[:SPLIT_TIME]
data_test = data[SPLIT_TIME:]
train_dataset = tf.data.Dataset.from_tensor_slices(data_train)
test_dataset = tf.data.Dataset.from_tensor_slices(data_test)




## 8. Neural Network Architecture

In [None]:
def modelling(train_dataset, test_dataset, epoch, loss_function, metrics, optimizer, activation, hidden_node):
        model = tf.keras.models.Sequential([
                tf.keras.layers.Input(shape=(3,)),
                tf.keras.layers.Dense(hidden_node, activation=activation),
                tf.keras.layers.Dense(1)
                ])
        model.compile(loss=loss_function, optimizer=optimizer, metrics=[metrics])
        history = model.fit(train_dataset, epochs=epoch, validation_data=test_dataset)
        if __name__ == '__main__':
                model.save(f"model_{hidden_node}.h5")
        final_loss = history.history['loss'][-1]
        weights = model.get_weights()
        return final_loss, weights[0], weights[1], weights[2], weights[3]

In [None]:
model_sum = pd.DataFrame(columns=['mse', 'hl_weights', 'hl_bias', 'ol_weights', 'ol_bias'])
for n in range(1, 21):
    a,b,c,d,e = modelling(train_dataset=train_dataset, 
                            test_dataset=test_dataset,
                            epoch=50, 
                            loss_function='mse',
                            metrics = 'mae', 
                            optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
                            activation='relu',
                            hidden_node=n)
    model_sum.loc[n] = [a,b,c,d,e]                