<a href="https://colab.research.google.com/github/Alfikriangelo/MachineLearningTasks/blob/main/10thWeekTask/ClassificationMLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [56]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.impute import KNNImputer

In [57]:
data = pd.read_csv("sample_data/beijing-pm25.csv")


In [58]:
# Melihat missing values
missing_values = data.isnull().sum()
print("Jumlah missing value di setiap kolom:")
print(missing_values)


Jumlah missing value di setiap kolom:
No          0
year        0
month       0
day         0
hour        0
pm2.5    2067
DEWP        0
TEMP        0
PRES        0
cbwd        0
Iws         0
Is          0
Ir          0
dtype: int64


In [59]:
# Mengisi missing values pada kolom 'pm2.5' menggunakan KNN Imputer
imputer = KNNImputer(n_neighbors=3, weights='uniform')
data[['pm2.5']] = imputer.fit_transform(data[['pm2.5']])

In [60]:
# Memastikan bahwa missing value sudah diisi
missing_values = data.isnull().sum()
print("Jumlah missing value di setiap kolom:")
print(missing_values)


Jumlah missing value di setiap kolom:
No       0
year     0
month    0
day      0
hour     0
pm2.5    0
DEWP     0
TEMP     0
PRES     0
cbwd     0
Iws      0
Is       0
Ir       0
dtype: int64


In [61]:
# Create target column (classification based on pm2.5 levels)
def categorize_pm25(value):
    if value <= 35:
        return 0  # Low
    elif value <= 75:
        return 1  # Moderate
    else:
        return 2  # High

data['target'] = data['pm2.5'].apply(categorize_pm25)

In [62]:
# Preprocess data
X = data.drop(columns=['No', 'year', 'month', 'day', 'hour', 'pm2.5', 'cbwd', 'target'])
# Drop columns that are not features (adjust as needed)
y = data['target']

In [63]:
# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [64]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [65]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

In [66]:
# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

In [67]:
# Function to define MLP model
def create_mlp(input_size, hidden_layers, activation_fn, num_classes):
    layers = []
    in_features = input_size

    # Add hidden layers
    for hidden_units in hidden_layers:
        layers.append(nn.Linear(in_features, hidden_units))
        layers.append(activation_fn())
        in_features = hidden_units

    # Add output layer
    layers.append(nn.Linear(in_features, num_classes))
    return nn.Sequential(*layers)

In [68]:
# Hyperparameters
input_size = X_train.shape[1]
hidden_layer_configs = [[16, 32, 64]]
activation_fns = [nn.ReLU, nn.Sigmoid]
epochs_list = [100, 250]
learning_rates = [1, 0.1,0.01]
batch_sizes = [128,256, 512]
num_classes = len(np.unique(y))

In [69]:
# Experiment loop
results = []
iteration = 1

for hidden_layers in hidden_layer_configs:
    for activation_fn in activation_fns:
        for epochs in epochs_list:
            for lr in learning_rates:
                for batch_size in batch_sizes:
                    print(f"Iteration {iteration}: Training with hidden_layers={hidden_layers}, activation_function={activation_fn.__name__}, epoch={epochs}, lr={lr}, batch_size={batch_size}")
                    iteration += 1

                    # Create model
                    model = create_mlp(input_size, hidden_layers, activation_fn, num_classes)
                    criterion = nn.CrossEntropyLoss()
                    optimizer = optim.Adam(model.parameters(), lr=lr)

                    # DataLoader
                    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

                    # Training loop
                    model.train()
                    for epoch in range(epochs):
                        for X_batch, y_batch in train_loader:
                            optimizer.zero_grad()
                            outputs = model(X_batch)
                            loss = criterion(outputs, y_batch)
                            loss.backward()
                            optimizer.step()

                    # Evaluation
                    model.eval()
                    with torch.no_grad():
                        y_pred = model(X_test_tensor)
                        y_pred_labels = torch.argmax(y_pred, axis=1)
                        acc = accuracy_score(y_test_tensor, y_pred_labels)

                    # Save results
                    results.append({
                        'hidden_layers': hidden_layers,
                        'activation_fn': activation_fn.__name__,
                        'epochs': epochs,
                        'learning_rate': lr,
                        'batch_size': batch_size,
                        'accuracy': acc
                    })


Iteration 1: Training with hidden_layers=[16, 32, 64], activation_function=ReLU, epoch=100, lr=1, batch_size=128
Iteration 2: Training with hidden_layers=[16, 32, 64], activation_function=ReLU, epoch=100, lr=1, batch_size=256
Iteration 3: Training with hidden_layers=[16, 32, 64], activation_function=ReLU, epoch=100, lr=1, batch_size=512
Iteration 4: Training with hidden_layers=[16, 32, 64], activation_function=ReLU, epoch=100, lr=0.1, batch_size=128
Iteration 5: Training with hidden_layers=[16, 32, 64], activation_function=ReLU, epoch=100, lr=0.1, batch_size=256
Iteration 6: Training with hidden_layers=[16, 32, 64], activation_function=ReLU, epoch=100, lr=0.1, batch_size=512
Iteration 7: Training with hidden_layers=[16, 32, 64], activation_function=ReLU, epoch=100, lr=0.01, batch_size=128
Iteration 8: Training with hidden_layers=[16, 32, 64], activation_function=ReLU, epoch=100, lr=0.01, batch_size=256
Iteration 9: Training with hidden_layers=[16, 32, 64], activation_function=ReLU, epo

In [71]:
# Menampilkan hasil eksperimen
results_df = pd.DataFrame(results)
print(results_df)

   hidden_layers activation_fn  epochs  learning_rate  batch_size  accuracy
0   [16, 32, 64]          ReLU     100           1.00         128  0.514204
1   [16, 32, 64]          ReLU     100           1.00         256  0.514204
2   [16, 32, 64]          ReLU     100           1.00         512  0.514204
3   [16, 32, 64]          ReLU     100           0.10         128  0.613805
4   [16, 32, 64]          ReLU     100           0.10         256  0.640274
5   [16, 32, 64]          ReLU     100           0.10         512  0.645636
6   [16, 32, 64]          ReLU     100           0.01         128  0.651683
7   [16, 32, 64]          ReLU     100           0.01         256  0.652139
8   [16, 32, 64]          ReLU     100           0.01         512  0.651797
9   [16, 32, 64]          ReLU     250           1.00         128  0.514204
10  [16, 32, 64]          ReLU     250           1.00         256  0.514204
11  [16, 32, 64]          ReLU     250           1.00         512  0.514204
12  [16, 32,

In [70]:
# Find the best configuration
best_config = max(results, key=lambda x: x['accuracy'])
print("Best Configuration:", best_config)


Best Configuration: {'hidden_layers': [16, 32, 64], 'activation_fn': 'Sigmoid', 'epochs': 250, 'learning_rate': 0.01, 'batch_size': 128, 'accuracy': 0.6592127780946948}
