<a href="https://colab.research.google.com/github/Alfikriangelo/MachineLearningTasks/blob/main/10thWeekTask/RegressionMLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [59]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import KNNImputer


In [60]:
# Definisikan MLP untuk regresi
class MLPRegression(nn.Module):
    def __init__(self, input_size, hidden_layers, activation_function):
        super(MLPRegression, self).__init__()

        # Daftar untuk layer-layer MLP
        layers = []
        prev_size = input_size
        for h in hidden_layers:
            layers.append(nn.Linear(prev_size, h))
            if activation_function == 'relu':
                layers.append(nn.ReLU())
            elif activation_function == 'sigmoid':
                layers.append(nn.Sigmoid())
            elif activation_function == 'tanh':
                layers.append(nn.Tanh())
            elif activation_function == 'softmax':
                layers.append(nn.Softmax(dim=1))
            prev_size = h
        layers.append(nn.Linear(prev_size, 1))  # Output layer

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


In [61]:
# Load dan preprocess data
dataset = pd.read_csv("sample_data/beijing-pm25.csv")

In [62]:
# Melihat missing values
missing_values = dataset.isnull().sum()
print("Jumlah missing value di setiap kolom:")
print(missing_values)


Jumlah missing value di setiap kolom:
No          0
year        0
month       0
day         0
hour        0
pm2.5    2067
DEWP        0
TEMP        0
PRES        0
cbwd        0
Iws         0
Is          0
Ir          0
dtype: int64


In [63]:
# Mengisi missing values pada kolom 'pm2.5' menggunakan KNN Imputer
imputer = KNNImputer(n_neighbors=3, weights='uniform')
dataset[['pm2.5']] = imputer.fit_transform(dataset[['pm2.5']])

In [64]:
# Memastikan bahwa missing value sudah diisi
missing_values = dataset.isnull().sum()
print("Jumlah missing value di setiap kolom:")
print(missing_values)


Jumlah missing value di setiap kolom:
No       0
year     0
month    0
day      0
hour     0
pm2.5    0
DEWP     0
TEMP     0
PRES     0
cbwd     0
Iws      0
Is       0
Ir       0
dtype: int64


In [65]:
# Memilih fitur dan target
X = dataset[['year', 'month', 'day', 'hour', 'DEWP', 'TEMP', 'PRES', 'Iws', 'Is', 'Ir']].values
y = dataset['pm2.5'].values

In [66]:
# Normalisasi fitur
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [67]:
# Split data menjadi train dan test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [68]:
# Konversi ke tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [69]:
# Daftar parameter yang akan dicoba
hidden_layers_options = [
    (16, 32, 64)
]
activation_functions = ['relu', 'sigmoid']
epochs = [ 100, 250]
learning_rates = [ 1,0.1,0.01]
batch_sizes = [128,256, 512]


In [70]:
# Hasil eksperimen
results = []
iteration = 1

In [71]:
# Loop untuk mencocokkan semua kombinasi
for hidden_layers in hidden_layers_options:
    for activation_function in activation_functions:
        for epoch in epochs:
            for lr in learning_rates:
                for batch_size in batch_sizes:
                    print(f"Iteration {iteration}: Training with hidden_layers={hidden_layers}, activation_function={activation_function}, epoch={epoch}, lr={lr}, batch_size={batch_size}")
                    iteration += 1

                    # Membuat model
                    model = MLPRegression(X_train.shape[1], hidden_layers, activation_function)
                    optimizer = optim.Adam(model.parameters(), lr=lr)
                    criterion = nn.MSELoss()

                    # Train model
                    model.train()
                    for e in range(epoch):
                        for i in range(0, len(X_train_tensor), batch_size):
                            # Batch slicing
                            inputs = X_train_tensor[i:i+batch_size]
                            targets = y_train_tensor[i:i+batch_size]

                            # Forward pass
                            optimizer.zero_grad()
                            outputs = model(inputs)
                            loss = criterion(outputs, targets)

                            # Backward pass and optimization
                            loss.backward()
                            optimizer.step()

                    # Evaluasi model
                    model.eval()
                    with torch.no_grad():
                        test_predictions = model(X_test_tensor).numpy()
                        y_test_actual = y_test_tensor.numpy()

                    # Menghitung R2 dan MSE
                    r2 = r2_score(y_test_actual, test_predictions)
                    mse = mean_squared_error(y_test_actual, test_predictions)

                    # Menyimpan hasil
                    results.append({
                        'hidden_layers': hidden_layers,
                        'activation_function': activation_function,
                        'epoch': epoch,
                        'learning_rate': lr,
                        'batch_size': batch_size,
                        'R2': r2,
                        'MSE': mse
                    })

Iteration 1: Training with hidden_layers=(16, 32, 64), activation_function=relu, epoch=100, lr=1, batch_size=128
Iteration 2: Training with hidden_layers=(16, 32, 64), activation_function=relu, epoch=100, lr=1, batch_size=256
Iteration 3: Training with hidden_layers=(16, 32, 64), activation_function=relu, epoch=100, lr=1, batch_size=512
Iteration 4: Training with hidden_layers=(16, 32, 64), activation_function=relu, epoch=100, lr=0.1, batch_size=128
Iteration 5: Training with hidden_layers=(16, 32, 64), activation_function=relu, epoch=100, lr=0.1, batch_size=256
Iteration 6: Training with hidden_layers=(16, 32, 64), activation_function=relu, epoch=100, lr=0.1, batch_size=512
Iteration 7: Training with hidden_layers=(16, 32, 64), activation_function=relu, epoch=100, lr=0.01, batch_size=128
Iteration 8: Training with hidden_layers=(16, 32, 64), activation_function=relu, epoch=100, lr=0.01, batch_size=256
Iteration 9: Training with hidden_layers=(16, 32, 64), activation_function=relu, epo

In [72]:
# Menampilkan hasil eksperimen
results_df = pd.DataFrame(results)
print(results_df)

   hidden_layers activation_function  epoch  learning_rate  batch_size  \
0   (16, 32, 64)                relu    100           1.00         128   
1   (16, 32, 64)                relu    100           1.00         256   
2   (16, 32, 64)                relu    100           1.00         512   
3   (16, 32, 64)                relu    100           0.10         128   
4   (16, 32, 64)                relu    100           0.10         256   
5   (16, 32, 64)                relu    100           0.10         512   
6   (16, 32, 64)                relu    100           0.01         128   
7   (16, 32, 64)                relu    100           0.01         256   
8   (16, 32, 64)                relu    100           0.01         512   
9   (16, 32, 64)                relu    250           1.00         128   
10  (16, 32, 64)                relu    250           1.00         256   
11  (16, 32, 64)                relu    250           1.00         512   
12  (16, 32, 64)                relu  

In [73]:
# Find the best configuration
best_config = max(results, key=lambda x: x['R2'])
print("Best Configuration:", best_config)


Best Configuration: {'hidden_layers': (16, 32, 64), 'activation_function': 'relu', 'epoch': 250, 'learning_rate': 0.01, 'batch_size': 128, 'R2': 0.6941537857055664, 'MSE': 2427.8652}
