<a href="https://colab.research.google.com/github/Dusein/MachineLearningTask/blob/main/14thWeekTask/RNN_dan_Deep_RNN_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Ganti 'your_file_path' dengan path sebenarnya ke file CSV Anda di Google Drive
file_path = '/content/drive/My Drive/Dataset/diabetes_012_health_indicators_BRFSS2015.csv'

# Membaca file CSV ke dalam DataFrame
data = pd.read_csv(file_path)

# Menampilkan beberapa baris pertama dari DataFrame
print(data.head())

   Diabetes_012  HighBP  HighChol  CholCheck   BMI  Smoker  Stroke  \
0           0.0     1.0       1.0        1.0  40.0     1.0     0.0   
1           0.0     0.0       0.0        0.0  25.0     1.0     0.0   
2           0.0     1.0       1.0        1.0  28.0     0.0     0.0   
3           0.0     1.0       0.0        1.0  27.0     0.0     0.0   
4           0.0     1.0       1.0        1.0  24.0     0.0     0.0   

   HeartDiseaseorAttack  PhysActivity  Fruits  ...  AnyHealthcare  \
0                   0.0           0.0     0.0  ...            1.0   
1                   0.0           1.0     0.0  ...            0.0   
2                   0.0           0.0     1.0  ...            1.0   
3                   0.0           1.0     1.0  ...            1.0   
4                   0.0           1.0     1.0  ...            1.0   

   NoDocbcCost  GenHlth  MentHlth  PhysHlth  DiffWalk  Sex   Age  Education  \
0          0.0      5.0      18.0      15.0       1.0  0.0   9.0        4.0   
1     

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Separate features and target
X = data.drop('Diabetes_012', axis=1)
y = data['Diabetes_012']

# Normalize features
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

# Split the data into train, validation, and test sets (70% train, 15% val, 15% test)
X_train, X_temp, y_train, y_temp = train_test_split(X_normalized, y, test_size=0.3, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

# Display shapes of the datasets
X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape


((177576, 21), (38052, 21), (38052, 21), (177576,), (38052,), (38052,))

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Convert datasets to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# Create DataLoader objects
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Initialize hidden state
        # Reshape the input to have a sequence length dimension, even if it's 1
        x = x.unsqueeze(1)  # Add a dimension for sequence length
        h0 = torch.zeros(self.rnn.num_layers, x.size(0), self.rnn.hidden_size).to(x.device)

        # RNN forward pass
        out, _ = self.rnn(x, h0)

        # Take the output of the last time step
        out = out[:, -1, :]
        out = self.fc(out)
        return out

In [None]:
# Define training and evaluation functions
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            # Forward pass
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total += y_batch.size(0)
                correct += (predicted == y_batch).sum().item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {running_loss/len(train_loader):.4f}, "
              f"Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {100 * correct/total:.2f}%")

    # Return validation accuracy and loss
    val_accuracy = 100 * correct / total
    val_loss = val_loss / len(val_loader)
    return val_accuracy, val_loss # Added this line to return the values

In [None]:
# Model parameters
input_size = X_train.shape[1]
hidden_size = 32
num_layers = 1
num_classes = len(y.unique())

# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RNNModel(input_size, hidden_size, num_layers, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5, device=device)

Epoch [1/5], Train Loss: 0.4156, Val Loss: 0.3971, Val Accuracy: 84.88%
Epoch [2/5], Train Loss: 0.3964, Val Loss: 0.3939, Val Accuracy: 84.99%
Epoch [3/5], Train Loss: 0.3947, Val Loss: 0.3939, Val Accuracy: 84.91%
Epoch [4/5], Train Loss: 0.3942, Val Loss: 0.3925, Val Accuracy: 84.97%
Epoch [5/5], Train Loss: 0.3937, Val Loss: 0.3919, Val Accuracy: 85.03%


(85.02838221381268, 0.3918576860878648)

In [None]:
hidden_sizes = [32, 64, 128]
results = {}
for hidden_size in hidden_sizes:
    model = RNNModel(input_size, hidden_size, num_layers, num_classes).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    val_accuracy, val_loss = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5, device=device)  # Train for 5 epochs for each hidden size
    results[hidden_size] = {'accuracy': val_accuracy, 'loss': val_loss}

print(results)

Epoch [1/5], Train Loss: 0.4143, Val Loss: 0.3974, Val Accuracy: 84.84%
Epoch [2/5], Train Loss: 0.3960, Val Loss: 0.3948, Val Accuracy: 84.96%
Epoch [3/5], Train Loss: 0.3947, Val Loss: 0.3934, Val Accuracy: 84.91%
Epoch [4/5], Train Loss: 0.3940, Val Loss: 0.3937, Val Accuracy: 85.02%
Epoch [5/5], Train Loss: 0.3935, Val Loss: 0.3947, Val Accuracy: 84.86%
Epoch [1/5], Train Loss: 0.4122, Val Loss: 0.3948, Val Accuracy: 85.00%
Epoch [2/5], Train Loss: 0.3964, Val Loss: 0.3951, Val Accuracy: 84.94%
Epoch [3/5], Train Loss: 0.3947, Val Loss: 0.3925, Val Accuracy: 84.98%
Epoch [4/5], Train Loss: 0.3942, Val Loss: 0.3921, Val Accuracy: 85.04%
Epoch [5/5], Train Loss: 0.3937, Val Loss: 0.3936, Val Accuracy: 85.04%
Epoch [1/5], Train Loss: 0.4078, Val Loss: 0.3975, Val Accuracy: 84.85%
Epoch [2/5], Train Loss: 0.3975, Val Loss: 0.3946, Val Accuracy: 84.87%
Epoch [3/5], Train Loss: 0.3958, Val Loss: 0.3957, Val Accuracy: 84.91%
Epoch [4/5], Train Loss: 0.3951, Val Loss: 0.3938, Val Accuracy:

In [None]:
optimizers = [optim.SGD(model.parameters(), lr=0.001), optim.RMSprop(model.parameters(), lr=0.001), optim.Adam(model.parameters(), lr=0.001)]
optimizer_names = ['SGD', 'RMSprop', 'Adam']
results = {}
for optimizer, optimizer_name in zip(optimizers, optimizer_names):
    model = RNNModel(input_size, hidden_size, num_layers, num_classes).to(device)
    val_accuracy, val_loss = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5, device=device)
    results[optimizer_name] = {'accuracy': val_accuracy, 'loss': val_loss}

print(results)

Epoch [1/5], Train Loss: 1.0626, Val Loss: 1.0627, Val Accuracy: 48.24%
Epoch [2/5], Train Loss: 1.0626, Val Loss: 1.0627, Val Accuracy: 48.24%
Epoch [3/5], Train Loss: 1.0626, Val Loss: 1.0627, Val Accuracy: 48.24%
Epoch [4/5], Train Loss: 1.0626, Val Loss: 1.0627, Val Accuracy: 48.24%
Epoch [5/5], Train Loss: 1.0626, Val Loss: 1.0627, Val Accuracy: 48.24%
Epoch [1/5], Train Loss: 1.0908, Val Loss: 1.0898, Val Accuracy: 37.20%
Epoch [2/5], Train Loss: 1.0908, Val Loss: 1.0898, Val Accuracy: 37.20%
Epoch [3/5], Train Loss: 1.0908, Val Loss: 1.0898, Val Accuracy: 37.20%
Epoch [4/5], Train Loss: 1.0908, Val Loss: 1.0898, Val Accuracy: 37.20%
Epoch [5/5], Train Loss: 1.0908, Val Loss: 1.0898, Val Accuracy: 37.20%
Epoch [1/5], Train Loss: 1.0545, Val Loss: 1.0550, Val Accuracy: 54.90%
Epoch [2/5], Train Loss: 1.0545, Val Loss: 1.0550, Val Accuracy: 54.90%
Epoch [3/5], Train Loss: 1.0545, Val Loss: 1.0550, Val Accuracy: 54.90%
Epoch [4/5], Train Loss: 1.0545, Val Loss: 1.0550, Val Accuracy:

In [None]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

# ... (model initialization and other setup) ...

scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3)  # Reduce LR if val loss plateaus
best_val_loss = float('inf')
patience = 5  # Number of epochs to wait for improvement
epochs_without_improvement = 0

for epoch in range(350):  # Max epochs
    # ... (training loop) ...

    # Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
    val_loss /= len(val_loader) # Calculate average validation loss

    scheduler.step(val_loss)  # Update learning rate

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1
        if epochs_without_improvement >= patience:
            print("Early stopping triggered")
            break

Early stopping triggered
