In [8]:
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error

# Load and shuffle the dataset
print("...............Reading the Dataset and Dataset Pre-Processing ................")
start_time = time.time()
dataset = shuffle(pd.read_excel("dataset_ta.xlsx"))

# Drop the date column
dataset = dataset.drop(columns=["Tgl/bln/thn"])

# Separate features (x) and target (y)
x = dataset.drop(columns=["curah_hujan"])
y = dataset[['curah_hujan']]

# Normalize data using Min-Max Scaler
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

x_scaled = scaler_x.fit_transform(x)
y_scaled = scaler_y.fit_transform(y)  # Normalizing the target

# Implementing a simple neural network with numpy

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def train_neural_network(X_train, y_train, X_test, y_test, hidden_units, learning_rate, epochs):
    input_layer_neurons = X_train.shape[1]
    output_neurons = 1
    
    hidden_weights = np.random.uniform(size=(input_layer_neurons, hidden_units))
    hidden_bias = np.random.uniform(size=(1, hidden_units))
    output_weights = np.random.uniform(size=(hidden_units, output_neurons))
    output_bias = np.random.uniform(size=(1, output_neurons))
    
    for epoch in range(epochs):
        # Forward Propagation
        hidden_layer_activation = np.dot(X_train, hidden_weights)
        hidden_layer_activation += hidden_bias
        hidden_layer_output = sigmoid(hidden_layer_activation)
        
        output_layer_activation = np.dot(hidden_layer_output, output_weights)
        output_layer_activation += output_bias
        predicted_output = sigmoid(output_layer_activation)
        
        # Backpropagation
        error = y_train - predicted_output
        d_predicted_output = error * sigmoid_derivative(predicted_output)
        
        error_hidden_layer = d_predicted_output.dot(output_weights.T)
        d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_output)
        
        # Updating Weights and Biases
        output_weights += hidden_layer_output.T.dot(d_predicted_output) * learning_rate
        output_bias += np.sum(d_predicted_output, axis=0, keepdims=True) * learning_rate
        hidden_weights += X_train.T.dot(d_hidden_layer) * learning_rate
        hidden_bias += np.sum(d_hidden_layer, axis=0, keepdims=True) * learning_rate
    
    # Test the model
    hidden_layer_activation = np.dot(X_test, hidden_weights)
    hidden_layer_activation += hidden_bias
    hidden_layer_output = sigmoid(hidden_layer_activation)
    
    output_layer_activation = np.dot(hidden_layer_output, output_weights)
    output_layer_activation += output_bias
    predicted_output = sigmoid(output_layer_activation)
    
    rmse = np.sqrt(mean_squared_error(y_test, predicted_output))
    return rmse

# Fungsi untuk mencari parameter terbaik
def find_best_params_numpy(X, y, test_size):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    
    best_rmse = float('inf')
    best_params = None
    
    for hidden_units in [10, 20]:
        for learning_rate in [0.01, 0.001]:
            for epochs in [50, 100]:
                rmse = train_neural_network(X_train, y_train, X_test, y_test, hidden_units, learning_rate, epochs)
                if rmse < best_rmse:
                    best_rmse = rmse
                    best_params = (hidden_units, learning_rate, epochs)
    
    return best_rmse, best_params

# Mengecek parameter terbaik untuk rasio 70:30, 80:20, dan 90:10
results_numpy = {}
for test_size in [0.3, 0.2, 0.1]:
    rmse, params = find_best_params_numpy(x_scaled, y_scaled.reshape(-1, 1), test_size)
    results_numpy[f"Train/Test Split {int((1-test_size)*100)}:{int(test_size*100)}"] = {
        'Best RMSE': rmse,
        'Best Parameters': params
    }

# Convert results to DataFrame and display
results_df = pd.DataFrame(results_numpy)
print("Neural Network Parameter Tuning Results:")
print(results_df)

...............Reading the Dataset and Dataset Pre-Processing ................
Neural Network Parameter Tuning Results:
                Train/Test Split 70:30 Train/Test Split 80:20  \
Best RMSE                     0.122633                0.12739   
Best Parameters        (10, 0.001, 50)        (10, 0.001, 50)   

                Train/Test Split 90:10  
Best RMSE                     0.121947  
Best Parameters       (10, 0.001, 100)  
