In [2]:
# Print the column names to check
print(df.columns)

Index(['Suhu', 'Kelembaban Udara', 'Tekanan Udara', 'Arah Angin',
       'Kecepatan Angin', 'curah_hujan'],
      dtype='object')


# 1. Manual Tuning

In [15]:
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import os

# Baca data dari file Excel
df = pd.read_excel("dataset_ta.xlsx")

# Print the column names to check
print(df.columns)

# Drop the date column (update the column name if needed)
df = df.drop(columns=["Tgl/bln/thn"])

# Pisahkan fitur (X) dan target (y) (update column names based on the actual columns in the DataFrame)
X = df[['suhu', 'kelembaban_udara', 'kecepatan_angin', 'arah_angin', 'tekanan_udara']]
y = df['curah_hujan']

# Normalisasi data
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_normalized = scaler_X.fit_transform(X)
y_normalized = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()

# Tentukan path direktori yang diinginkan
path_hasil = "Path Hasil"

# Periksa apakah direktori sudah ada
if not os.path.exists(path_hasil):
    # Jika belum ada, buat direktori
    os.makedirs(path_hasil)

# Tentukan grid parameter
param_grid = {
    'hidden_layer_sizes': [(3,), (4, 5), (6, 7, 8)],
    'learning_rate_init': [1, 0.75, 0.5, 0.25, 0.01, 0.001],
    'max_iter': [500, 1000, 1500, 2000, 2500, 3000]
}

# Gunakan ParameterGrid untuk menghasilkan semua kombinasi parameter
parameter_combinations = list(ParameterGrid(param_grid))

# Random Seed
random_seed = 42

# Split ratios
split_ratios = [0.1, 0.2, 0.3]
results = []

for split_ratio in split_ratios:
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y_normalized, test_size=split_ratio, random_state=random_seed)
    
    for params in parameter_combinations:
        model_bpnn = MLPRegressor(**params)
        model_bpnn.fit(X_train, y_train)
        
        predictions = model_bpnn.predict(X_test)
        
        # Denormalisasi prediksi untuk menghitung error
        predictions_denormalized = scaler_y.inverse_transform(predictions.reshape(-1, 1)).flatten()
        y_test_denormalized = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten()
        
        error = np.sqrt(mean_squared_error(y_test_denormalized, predictions_denormalized))
        
        # Menyimpan hasil eksperimen
        result = {
            'split_ratio': split_ratio,
            'hidden_layer_sizes': params['hidden_layer_sizes'],
            'learning_rate_init': params['learning_rate_init'],
            'max_iter': params['max_iter'],
            'error': error
        }
        results.append(result)
        
        print(f"Split Ratio: {split_ratio} | Hidden Layer Sizes: {params['hidden_layer_sizes']} | Learning Rate: {params['learning_rate_init']} | Max Iter: {params['max_iter']} | Error: {error}")

# Menyimpan Hasil
result_df = pd.DataFrame(results)
result_df.to_csv(os.path.join(path_hasil, "tuning_results.csv"), index=False, sep=',')

# Menentukan split ratio terbaik
best_result = result_df.loc[result_df['error'].idxmin()]
print("\nSplit Ratio Terbaik Berdasarkan Error Terkecil:")
print(best_result)

Index(['Tgl/bln/thn', 'suhu', 'kelembaban_udara', 'tekanan_udara',
       'arah_angin', 'kecepatan_angin', 'curah_hujan'],
      dtype='object')
Split Ratio: 0.1 | Hidden Layer Sizes: (3,) | Learning Rate: 1 | Max Iter: 500 | Error: 10.66109276712809
Split Ratio: 0.1 | Hidden Layer Sizes: (3,) | Learning Rate: 1 | Max Iter: 1000 | Error: 10.670441947593082
Split Ratio: 0.1 | Hidden Layer Sizes: (3,) | Learning Rate: 1 | Max Iter: 1500 | Error: 10.668774907097172
Split Ratio: 0.1 | Hidden Layer Sizes: (3,) | Learning Rate: 1 | Max Iter: 2000 | Error: 10.702687653846214
Split Ratio: 0.1 | Hidden Layer Sizes: (3,) | Learning Rate: 1 | Max Iter: 2500 | Error: 10.698948031533401
Split Ratio: 0.1 | Hidden Layer Sizes: (3,) | Learning Rate: 1 | Max Iter: 3000 | Error: 10.654511874648652
Split Ratio: 0.1 | Hidden Layer Sizes: (3,) | Learning Rate: 0.75 | Max Iter: 500 | Error: 10.68352090869787
Split Ratio: 0.1 | Hidden Layer Sizes: (3,) | Learning Rate: 0.75 | Max Iter: 1000 | Error: 10.82996

# 2. GridSearch CV

In [10]:
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import os
import joblib

# Random Seed
random_seed = 42

# Baca data dari file Excel
df = pd.read_excel("dataset_ta.xlsx")

# Print the column names to check
print(df.columns)

# Drop the date column (update the column name if needed)
df = df.drop(columns=["Tgl/bln/thn"])

# Pisahkan fitur (X) dan target (y) (update column names based on the actual columns in the DataFrame)
X = df[['suhu', 'kelembaban_udara', 'kecepatan_angin', 'arah_angin', 'tekanan_udara']]
y = df['curah_hujan']

# Normalisasi data
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_normalized = scaler_X.fit_transform(X)
y_normalized = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()

# Tentukan path direktori yang diinginkan
path_hasil = "Path Hasil"

# Periksa apakah direktori sudah ada
if not os.path.exists(path_hasil):
    # Jika belum ada, buat direktori
    os.makedirs(path_hasil)

# Tentukan grid parameter
param_grid = {
    'hidden_layer_sizes': [(3,), (4, 5), (6, 7, 8)],
    'learning_rate_init': [1, 0.75, 0.5, 0.25, 0.01, 0.001],
    'max_iter': [500, 1000, 1500, 2000, 2500, 3000]
}

# Split ratios
split_ratios = [0.1, 0.2, 0.3]
results = []

for split_ratio in split_ratios:
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y_normalized, test_size=split_ratio, random_state=random_seed)
    
    # Initialize the MLPRegressor
    mlp = MLPRegressor(solver='adam', random_state=42)
    
    # Initialize GridSearchCV
    grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, scoring='neg_mean_squared_error', cv=3, verbose=2)
    
    # Fit the model using GridSearchCV
    grid_search.fit(X_train, y_train)
    
    # Collect all results from the grid search
    for params, mean_test_score, std_test_score in zip(grid_search.cv_results_['params'], grid_search.cv_results_['mean_test_score'], grid_search.cv_results_['std_test_score']):
        # Predict on the test set using the current parameters
        model_bpnn = MLPRegressor(**params)
        model_bpnn.fit(X_train, y_train)
        predictions = model_bpnn.predict(X_test)

        # Denormalisasi prediksi untuk menghitung error
        predictions_denormalized = scaler_y.inverse_transform(predictions.reshape(-1, 1)).flatten()
        y_test_denormalized = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten()

        error = np.sqrt(mean_squared_error(y_test_denormalized, predictions_denormalized))

        # Menyimpan hasil eksperimen
        result = {
            'split_ratio': split_ratio,
            'hidden_layer_sizes': params['hidden_layer_sizes'],
            'learning_rate_init': params['learning_rate_init'],
            'max_iter': params['max_iter'],
            'mean_test_score': mean_test_score,
            'std_test_score': std_test_score,
            'error': error
        }
        results.append(result)
        print(f"Split Ratio: {split_ratio} | Hidden Layer Sizes: {params['hidden_layer_sizes']} | Learning Rate: {params['learning_rate_init']} | Max Iter: {params['max_iter']} | Error: {error}")

# Menyimpan Hasil
result_df = pd.DataFrame(results)
all_results_csv_path = os.path.join(path_hasil, "tuning_results_all.csv")
result_df.to_csv(all_results_csv_path, index=False, sep=',')

# Menentukan split ratio terbaik
best_result = result_df.loc[result_df['error'].idxmin()]
best_result_df = pd.DataFrame([best_result])
best_result_csv_path = os.path.join(path_hasil, "best_tuning_result.csv")
best_result_df.to_csv(best_result_csv_path, index=False, sep=',')

# Menampilkan hasil tuning
print("\nHasil Tuning untuk setiap Split Ratio:")
print(result_df)

print("\nSplit Ratio Terbaik Berdasarkan Error Terkecil:")
print(best_result)

Index(['Tgl/bln/thn', 'suhu', 'kelembaban_udara', 'tekanan_udara',
       'arah_angin', 'kecepatan_angin', 'curah_hujan'],
      dtype='object')
Fitting 3 folds for each of 108 candidates, totalling 324 fits
[CV] END hidden_layer_sizes=(3,), learning_rate_init=1, max_iter=500; total time=   0.0s
[CV] END hidden_layer_sizes=(3,), learning_rate_init=1, max_iter=500; total time=   0.0s
[CV] END hidden_layer_sizes=(3,), learning_rate_init=1, max_iter=500; total time=   0.0s
[CV] END hidden_layer_sizes=(3,), learning_rate_init=1, max_iter=1000; total time=   0.0s
[CV] END hidden_layer_sizes=(3,), learning_rate_init=1, max_iter=1000; total time=   0.0s
[CV] END hidden_layer_sizes=(3,), learning_rate_init=1, max_iter=1000; total time=   0.0s
[CV] END hidden_layer_sizes=(3,), learning_rate_init=1, max_iter=1500; total time=   0.0s
[CV] END hidden_layer_sizes=(3,), learning_rate_init=1, max_iter=1500; total time=   0.0s
[CV] END hidden_layer_sizes=(3,), learning_rate_init=1, max_iter=1500; tota