<a href="https://colab.research.google.com/github/Dusein/MachineLearningTask/blob/main/14thWeekTask/Markov_model_dan_Hidden_Markov_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Ganti 'your_file_path' dengan path sebenarnya ke file CSV Anda di Google Drive
file_path = '/content/drive/My Drive/Dataset/diabetes_012_health_indicators_BRFSS2015.csv'

# Membaca file CSV ke dalam DataFrame
data = pd.read_csv(file_path)

# Menampilkan beberapa baris pertama dari DataFrame
print(data.head())

   Diabetes_012  HighBP  HighChol  CholCheck   BMI  Smoker  Stroke  \
0           0.0     1.0       1.0        1.0  40.0     1.0     0.0   
1           0.0     0.0       0.0        0.0  25.0     1.0     0.0   
2           0.0     1.0       1.0        1.0  28.0     0.0     0.0   
3           0.0     1.0       0.0        1.0  27.0     0.0     0.0   
4           0.0     1.0       1.0        1.0  24.0     0.0     0.0   

   HeartDiseaseorAttack  PhysActivity  Fruits  ...  AnyHealthcare  \
0                   0.0           0.0     0.0  ...            1.0   
1                   0.0           1.0     0.0  ...            0.0   
2                   0.0           0.0     1.0  ...            1.0   
3                   0.0           1.0     1.0  ...            1.0   
4                   0.0           1.0     1.0  ...            1.0   

   NoDocbcCost  GenHlth  MentHlth  PhysHlth  DiffWalk  Sex   Age  Education  \
0          0.0      5.0      18.0      15.0       1.0  0.0   9.0        4.0   
1     

In [None]:
# Menyiapkan dataset untuk pelatihan
class DiabetesDataset(Dataset):
    def __init__(self, data):
        self.X = data.iloc[:, 1:].values  # Fitur
        self.y = data.iloc[:, 0].values  # Label

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)

# Membagi dataset menjadi data pelatihan dan pengujian
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Membuat DataLoader untuk pelatihan dan pengujian
train_dataset = DiabetesDataset(train_data)
test_dataset = DiabetesDataset(test_data)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print("Dataset untuk pelatihan dan pengujian telah disiapkan")

Dataset untuk pelatihan dan pengujian telah disiapkan


In [None]:
# Membuat arsitektur model RNN
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, pooling="avg"):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.pooling = pooling

        # Definisi lapisan RNN
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)

        # Definisi pooling (MaxPooling atau AveragePooling)
        if pooling == "max":
            self.pool = lambda x: torch.max(x, dim=1).values
        elif pooling == "avg":
            self.pool = lambda x: torch.mean(x, dim=1)
        else:
            raise ValueError("Pooling harus 'max' atau 'avg'")

        # Lapisan output (Linear)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Inisialisasi hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward pass melalui RNN
        out, _ = self.rnn(x, h0)

        # Pooling pada output RNN
        out = self.pool(out)

        # Lapisan output
        out = self.fc(out)
        return out

# Parameter model
input_size = train_dataset.X.shape[1]
hidden_sizes = [32, 64, 128]  # Ukuran tersembunyi yang akan dibandingkan
output_size = len(np.unique(train_dataset.y))  # Jumlah kelas
optimizers = ["SGD", "RMSProp", "Adam"]  # Optimizer yang akan dibandingkan


In [None]:
# Fungsi pelatihan dengan Early Stopping
def train_model_with_early_stopping(model, optimizer, criterion, train_loader, test_loader, epochs=50, patience=5):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)

    # Scheduler untuk mengurangi learning rate
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    best_loss = float('inf')
    patience_counter = 0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs.unsqueeze(1))
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        scheduler.step()

        # Evaluasi pada data pengujian
        model.eval()
        validation_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs.unsqueeze(1))
                loss = criterion(outputs, labels)
                validation_loss += loss.item()

                _, predicted = torch.max(outputs, 1)

                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        validation_loss /= len(test_loader)
        accuracy = 100 * correct / total
        print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {running_loss:.4f}, Validation Loss: {validation_loss:.4f}, Accuracy: {accuracy:.2f}%")

        # Early Stopping
        if validation_loss < best_loss:
            best_loss = validation_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print("Early stopping triggered")
            break

In [None]:
# Membandingkan optimizer dan hidden size dengan Early Stopping
for hidden_size in hidden_sizes:
    for opt_name in optimizers:
        print(f"\nTraining model dengan hidden size: {hidden_size} dan optimizer: {opt_name}")
        model = RNNModel(input_size, hidden_size, output_size, pooling="avg")

        if opt_name == "SGD":
            optimizer = optim.SGD(model.parameters(), lr=0.01)
        elif opt_name == "RMSProp":
            optimizer = optim.RMSprop(model.parameters(), lr=0.01)
        elif opt_name == "Adam":
            optimizer = optim.Adam(model.parameters(), lr=0.01)

        criterion = nn.CrossEntropyLoss()

        train_model_with_early_stopping(model, optimizer, criterion, train_loader, test_loader, epochs=50, patience=5)


Training model dengan hidden size: 32 dan optimizer: SGD
Epoch 1/50, Training Loss: 1376.1208, Validation Loss: 0.4177, Accuracy: 84.40%
Epoch 2/50, Training Loss: 1307.6033, Validation Loss: 0.4060, Accuracy: 84.46%
Epoch 3/50, Training Loss: 1296.3906, Validation Loss: 0.4080, Accuracy: 84.47%
Epoch 4/50, Training Loss: 1292.9697, Validation Loss: 0.4003, Accuracy: 84.56%
Epoch 5/50, Training Loss: 1288.6856, Validation Loss: 0.4018, Accuracy: 84.78%
Epoch 6/50, Training Loss: 1287.5206, Validation Loss: 0.3991, Accuracy: 84.55%
Epoch 7/50, Training Loss: 1285.9557, Validation Loss: 0.3979, Accuracy: 84.75%
Epoch 8/50, Training Loss: 1284.1900, Validation Loss: 0.3970, Accuracy: 84.74%
Epoch 9/50, Training Loss: 1283.2235, Validation Loss: 0.4010, Accuracy: 84.44%
Epoch 10/50, Training Loss: 1281.1611, Validation Loss: 0.3996, Accuracy: 84.68%
Epoch 11/50, Training Loss: 1264.6771, Validation Loss: 0.3957, Accuracy: 84.92%
Epoch 12/50, Training Loss: 1263.8520, Validation Loss: 0.39