<a href="https://colab.research.google.com/github/Alfikriangelo/DeepLearningTasks/blob/main/2ndWeekTask/Covertdataset_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
#import libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [12]:
# Load dataset
df = pd.read_csv("sample_data/compressed_data.csv")

In [13]:
# Hapus baris dengan NaN di target
df = df.dropna(subset=['Cover_Type'])

In [14]:
# Pisahkan fitur (X) dan target (y)
X = df.drop(columns=["Cover_Type"])
y = df["Cover_Type"] - 1  # Mengurangi 1 agar kelas mulai dari 0 (karena PyTorch membutuhkan label 0-based)

In [15]:
# Bagi dataset menjadi data latih dan uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [16]:
# Normalisasi fitur
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [17]:
# Konversi ke Tensor PyTorch
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_train_torch = torch.tensor(y_train.values, dtype=torch.long)
y_test_torch = torch.tensor(y_test.values, dtype=torch.long)

In [18]:
# Definisi Model PyTorch
class MLPModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MLPModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [19]:
# Inisialisasi model
input_size = X_train.shape[1]
hidden_size = 128
num_classes = len(np.unique(y))
model_torch = MLPModel(input_size, hidden_size, num_classes)

In [20]:
# Definisi loss function dan optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_torch.parameters(), lr=0.001)

In [21]:
# Early stopping manual PyTorch
early_stop_patience = 5
best_loss = float("inf")
patience = 0

In [22]:
# Training model PyTorch
for epoch in range(200):
    optimizer.zero_grad()
    outputs = model_torch(X_train_torch)
    loss = criterion(outputs, y_train_torch)
    loss.backward()
    optimizer.step()

    # Validasi
    with torch.no_grad():
        val_outputs = model_torch(X_test_torch)
        val_loss = criterion(val_outputs, y_test_torch)

    print(f"Epoch {epoch+1}, Loss: {loss.item()}, Val Loss: {val_loss.item()}")

    # Early stopping
    if val_loss.item() < best_loss:
        best_loss = val_loss.item()
        patience = 0
    else:
        patience += 1
    if patience >= early_stop_patience:
        print("Early stopping!")
        break

Epoch 1, Loss: 2.020599126815796, Val Loss: 1.977095127105713
Epoch 2, Loss: 1.9768424034118652, Val Loss: 1.9345585107803345
Epoch 3, Loss: 1.934304118156433, Val Loss: 1.8932808637619019
Epoch 4, Loss: 1.893025517463684, Val Loss: 1.853272557258606
Epoch 5, Loss: 1.8530049324035645, Val Loss: 1.8145248889923096
Epoch 6, Loss: 1.8142304420471191, Val Loss: 1.7770183086395264
Epoch 7, Loss: 1.7766848802566528, Val Loss: 1.7407276630401611
Epoch 8, Loss: 1.740343451499939, Val Loss: 1.7056175470352173
Epoch 9, Loss: 1.7051739692687988, Val Loss: 1.671643853187561
Epoch 10, Loss: 1.6711351871490479, Val Loss: 1.6387608051300049
Epoch 11, Loss: 1.6381800174713135, Val Loss: 1.606918215751648
Epoch 12, Loss: 1.606260061264038, Val Loss: 1.5760653018951416
Epoch 13, Loss: 1.5753259658813477, Val Loss: 1.5461527109146118
Epoch 14, Loss: 1.5453307628631592, Val Loss: 1.5171350240707397
Epoch 15, Loss: 1.5162297487258911, Val Loss: 1.4889711141586304
Epoch 16, Loss: 1.487981915473938, Val Loss

In [23]:
# Evaluasi PyTorch
y_pred_torch = torch.argmax(model_torch(X_test_torch), axis=1).numpy()
print("Akurasi:", accuracy_score(y_test, y_pred_torch))
print("Presisi:", precision_score(y_test, y_pred_torch, average='weighted'))
print("Recall:", recall_score(y_test, y_pred_torch, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred_torch, average='weighted'))

Akurasi: 0.7278813799987952
Presisi: 0.7237352966208289
Recall: 0.7278813799987952
F1 Score: 0.7114607853748885


In [24]:
# Konversi ke TensorFlow
X_train_tf = tf.convert_to_tensor(X_train, dtype=tf.float32)
X_test_tf = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_train_tf = tf.convert_to_tensor(y_train, dtype=tf.int32)
y_test_tf = tf.convert_to_tensor(y_test, dtype=tf.int32)

In [25]:
# Definisi Model TensorFlow
model_tf = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(input_size,)),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [26]:
# Kompilasi model
model_tf.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [27]:
# Early stopping TensorFlow
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [28]:
# Training model TensorFlow
model_tf.fit(X_train_tf, y_train_tf, epochs=30, validation_data=(X_test_tf, y_test_tf), callbacks=[early_stopping])

Epoch 1/30
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 3ms/step - accuracy: 0.7341 - loss: 0.6235 - val_accuracy: 0.7852 - val_loss: 0.5009
Epoch 2/30
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3ms/step - accuracy: 0.7902 - loss: 0.4882 - val_accuracy: 0.8025 - val_loss: 0.4681
Epoch 3/30
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 2ms/step - accuracy: 0.8077 - loss: 0.4540 - val_accuracy: 0.8189 - val_loss: 0.4369
Epoch 4/30
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 2ms/step - accuracy: 0.8186 - loss: 0.4326 - val_accuracy: 0.8217 - val_loss: 0.4292
Epoch 5/30
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 3ms/step - accuracy: 0.8253 - loss: 0.4175 - val_accuracy: 0.8310 - val_loss: 0.4141
Epoch 6/30
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3ms/step - accuracy: 0.8307 - loss: 0.4081 - val_accuracy: 0.8382 - val_loss: 0.399

<keras.src.callbacks.history.History at 0x7cb2a613b390>

In [29]:
# Evaluasi TensorFlow
y_pred_tf = np.argmax(model_tf.predict(X_test_tf), axis=1)
print("Akurasi:", accuracy_score(y_test, y_pred_tf))
print("Presisi:", precision_score(y_test, y_pred_tf, average='weighted'))
print("Recall:", recall_score(y_test, y_pred_tf, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred_tf, average='weighted'))

[1m3632/3632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Akurasi: 0.8599950087347142
Presisi: 0.8606025732474466
Recall: 0.8599950087347142
F1 Score: 0.8581858088796285


## 📌 **Penjelasan Metrik Evaluasi**

### **1. Akurasi**  
**Pengertian**:  
Akurasi mengukur seberapa sering model memprediksi dengan benar dari keseluruhan dataset.  

**Persamaan**:  
$$ Akurasi = \frac{TP + TN}{TP + TN + FP + FN} $$  

---

### **2. Presisi**  
**Pengertian**:  
Presisi mengukur seberapa banyak prediksi positif yang benar dibandingkan dengan total prediksi positif.  

**Persamaan**:  
$$ Presisi = \frac{TP}{TP + FP} $$  

---

### **3. Recall (Sensitivitas / True Positive Rate)**  
**Pengertian**:  
Recall mengukur seberapa banyak data positif yang berhasil diklasifikasikan dengan benar.  

**Persamaan**:  
$$ Recall = \frac{TP}{TP + FN} $$  

---

### **4. F1 Score**  
**Pengertian**:  
F1 Score adalah rata-rata harmonik antara **Presisi** dan **Recall**.  

**Persamaan**:  
$$ F1 Score = 2 \times \frac{Presisi \times Recall}{Presisi + Recall} $$  

---

## 🏆 **Kesimpulan dari Hasil Evaluasi**  

| Model        | Akurasi | Presisi | Recall | F1 Score |
|-------------|--------|---------|--------|----------|
| **PyTorch**  | 0.728  | 0.724   | 0.728  | 0.711    |
| **TensorFlow** | 0.860  | 0.861   | 0.860  | 0.858    |

✅ **TensorFlow lebih akurat, lebih presisi, lebih sensitif terhadap data positif, dan lebih seimbang dalam prediksi.**  

💡 **Semoga membantu! 🚀**
