In [2]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score

Segmen ini mengimpor pustaka-pustaka penting yang dibutuhkan untuk proyek machine learning:

- torch: Pustaka utama untuk deep learning di Python
- pandas: Digunakan untuk manipulasi data
- numpy: Untuk operasi numerik
- sklearn: Menyediakan alat untuk pembagian data, pra-pemrosesan, dan evaluasi model
- matplotlib: Untuk visualisasi data dan grafik

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Data preprocessing
def preprocess_data(data):
    # Convert categorical variables to numeric
    le = LabelEncoder()
    categorical_columns = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome', 'y']

    for col in categorical_columns:
        data[col] = le.fit_transform(data[col])

    # Split features and target
    X = data.drop(['y', 'pdays', 'previous', 'poutcome'], axis=1)  # Dropping less relevant columns
    y = data['y']

    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y

Fungsi ini melakukan pra-pemrosesan data dengan beberapa langkah kunci:

Mengonversi kolom kategorik menjadi numerik menggunakan LabelEncoder
Memisahkan fitur (X) dari variabel target (y)
Menghapus beberapa kolom yang dianggap kurang relevan
Melakukan penskalaan fitur menggunakan StandardScaler untuk memastikan semua fitur berada dalam skala yang sama

In [4]:
# MLP Model
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_layers, neurons_per_layer, activation):
        super(MLP, self).__init__()
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(nn.Linear(input_dim, neurons_per_layer))

        # Hidden layers
        for _ in range(hidden_layers - 1):
            self.layers.append(nn.Linear(neurons_per_layer, neurons_per_layer))

        # Output layer
        self.layers.append(nn.Linear(neurons_per_layer, 1))

        # Activation function
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'sigmoid':
            self.activation = nn.Sigmoid()
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        else:  # linear
            self.activation = nn.Identity()

    def forward(self, x):
        for layer in self.layers[:-1]:
            x = self.activation(layer(x))
        x = self.layers[-1](x)  # No activation on output layer for regression
        return x

Kelas MLP mendefinisikan arsitektur jaringan saraf:

Menerima parameter untuk dimensi input, jumlah lapisan tersembunyi, jumlah neuron, dan fungsi aktivasi
Membuat lapisan linear dengan jumlah neuron yang konsisten
Mendukung berbagai fungsi aktivasi: ReLU, Sigmoid, Tanh, atau linear
Metode forward menentukan bagaimana data mengalir melalui jaringan

In [5]:
def train_model(X_train, y_train, X_test, y_test, hidden_layers, neurons, activation, epochs, lr, batch_size):
    # Convert data to tensors
    X_train = torch.FloatTensor(X_train)
    y_train = torch.FloatTensor(y_train.values).reshape(-1, 1)
    X_test = torch.FloatTensor(X_test)
    y_test = torch.FloatTensor(y_test.values).reshape(-1, 1)

    # Create data loader
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Initialize model
    model = MLP(X_train.shape[1], hidden_layers, neurons, activation)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Training loop
    train_losses = []
    test_losses = []

    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        # Evaluate
        model.eval()
        with torch.no_grad():
            train_loss = criterion(model(X_train), y_train).item()
            test_loss = criterion(model(X_test), y_test).item()
            train_losses.append(train_loss)
            test_losses.append(test_loss)

        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

    # Final evaluation
    model.eval()
    with torch.no_grad():
        train_pred = model(X_train).numpy()
        test_pred = model(X_test).numpy()

    train_r2 = r2_score(y_train, train_pred)
    test_r2 = r2_score(y_test, test_pred)
    train_mse = mean_squared_error(y_train, train_pred)
    test_mse = mean_squared_error(y_test, test_pred)

    return {
        'train_r2': train_r2,
        'test_r2': test_r2,
        'train_mse': train_mse,
        'test_mse': test_mse,
        'train_losses': train_losses,
        'test_losses': test_losses
    }

Fungsi ini melakukan pelatihan model dengan fitur-fitur penting:

Konversi data ke tensor PyTorch
Membuat data loader untuk batch training
Inisialisasi model, loss function (MSE), dan optimizer (Adam)
Loop pelatihan dengan:

Proses training per batch
Evaluasi loss di data training dan testing
Pencetakan progress setiap 10 epoch


Menghitung metrik evaluasi seperti R-squared dan Mean Squared Error

In [6]:
# Experiment function
def run_experiments(data_path):
    # Load and preprocess data
    data = pd.read_csv(data_path, sep=';')
    X_scaled, y = preprocess_data(data)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # Hyperparameter configurations
    hidden_layers_list = [1, 2, 3]
    neurons_list = [4, 8, 16, 32, 64]
    activation_list = ['linear', 'sigmoid', 'relu', 'tanh']
    epochs_list = [1, 10, 25, 50, 100, 250]
    lr_list = [10, 1, 0.1, 0.01, 0.001, 0.0001]
    batch_size_list = [16, 32, 64, 128, 256, 512]

    results = []

    # Base configuration
    base_config = {
        'hidden_layers': 2,
        'neurons': 32,
        'activation': 'relu',
        'epochs': 50,
        'lr': 0.001,
        'batch_size': 64
    }

    # Test each hyperparameter individually
    for hidden_layers in hidden_layers_list:
        config = base_config.copy()
        config['hidden_layers'] = hidden_layers
        result = train_model(X_train, y_train, X_test, y_test, **config)
        results.append({
            'parameter': 'hidden_layers',
            'value': hidden_layers,
            'metrics': result
        })

    # Repeat for other hyperparameters...
    # (Similar loops for neurons, activation, epochs, lr, batch_size)

    return results

Fungsi ini merancang eksperimen untuk menguji berbagai hyperparameter:

Memuat dan membagi data
Mendefinisikan rentang hyperparameter untuk pengujian
Membuat konfigurasi dasar sebagai titik awal
Menjalankan eksperimen dengan mengubah satu hyperparameter pada satu waktu
Menyimpan hasil untuk analisis lebih lanjut

In [7]:
# Function to plot results
def plot_results(results):
    # Implementation of plotting functions to visualize the results
    pass

In [9]:
# Main execution
if __name__ == "__main__":
    results = run_experiments('/content/sample_data/bank-full.csv')
    plot_results(results)

Epoch [10/50], Train Loss: 0.0761, Test Loss: 0.0793
Epoch [20/50], Train Loss: 0.0742, Test Loss: 0.0775
Epoch [30/50], Train Loss: 0.0737, Test Loss: 0.0772
Epoch [40/50], Train Loss: 0.0724, Test Loss: 0.0765
Epoch [50/50], Train Loss: 0.0719, Test Loss: 0.0761
Epoch [10/50], Train Loss: 0.0724, Test Loss: 0.0766
Epoch [20/50], Train Loss: 0.0698, Test Loss: 0.0754
Epoch [30/50], Train Loss: 0.0682, Test Loss: 0.0757
Epoch [40/50], Train Loss: 0.0683, Test Loss: 0.0778
Epoch [50/50], Train Loss: 0.0655, Test Loss: 0.0760
Epoch [10/50], Train Loss: 0.0707, Test Loss: 0.0759
Epoch [20/50], Train Loss: 0.0658, Test Loss: 0.0742
Epoch [30/50], Train Loss: 0.0636, Test Loss: 0.0749
Epoch [40/50], Train Loss: 0.0617, Test Loss: 0.0767
Epoch [50/50], Train Loss: 0.0593, Test Loss: 0.0762
