In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Cek apakah CUDA tersedia
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load dataset
df = pd.read_csv("/content/Infrared.csv")

# Mengisi nilai yang hilang dengan median
df["Distance"].fillna(df["Distance"].median(), inplace=True)

# Encoding variabel kategorikal
label_encoders = {}
categorical_cols = ["Gender", "Age", "Ethnicity"]
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Normalisasi fitur numerik
scaler = StandardScaler()
numerical_cols = df.columns.difference(["aveOralM"])  # Semua fitur kecuali target
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

# Memisahkan fitur dan target
X = torch.tensor(df[numerical_cols].values, dtype=torch.float32).to(device)
y = torch.tensor(df["aveOralM"].values, dtype=torch.float32).view(-1, 1).to(device)

# Membagi data menjadi training (80%) dan testing (20%)
train_size = int(0.8 * len(df))
test_size = len(df) - train_size
dataset = TensorDataset(X, y)
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Membuat DataLoader untuk batch processing
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Definisi model Deep Learning
class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Inisialisasi model, loss function, dan optimizer
input_size = X.shape[1]
model = NeuralNet(input_size).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training model
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    if (epoch + 1) % 10 == 0:  # Print setiap 10 epoch
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")

# Evaluasi model
model.eval()
y_true, y_pred = [], []
with torch.no_grad():
    for batch_X, batch_y in test_loader:
        predictions = model(batch_X)
        y_true.extend(batch_y.cpu().numpy())
        y_pred.extend(predictions.cpu().numpy())

# Hitung metrik evaluasi
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_true, y_pred)

print(f"\nEvaluasi Model:")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-Squared (R²): {r2:.4f}")


Using device: cpu


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Distance"].fillna(df["Distance"].median(), inplace=True)


Epoch [10/50], Loss: 180.9966
Epoch [20/50], Loss: 12.3951
Epoch [30/50], Loss: 4.9354
Epoch [40/50], Loss: 2.6665
Epoch [50/50], Loss: 1.7335

Evaluasi Model:
Mean Squared Error (MSE): 2.9286
Root Mean Squared Error (RMSE): 1.7113
R-Squared (R²): -9.7971


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
df = pd.read_csv("/content/Infrared.csv")

# Mengisi nilai yang hilang dengan median
df["Distance"].fillna(df["Distance"].median(), inplace=True)

# Encoding variabel kategorikal
label_encoders = {}
categorical_cols = ["Gender", "Age", "Ethnicity"]
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Normalisasi fitur numerik
scaler = StandardScaler()
numerical_cols = df.columns.difference(["aveOralM"])  # Semua fitur kecuali target
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

# Memisahkan fitur dan target
X = df[numerical_cols].values
y = df["aveOralM"].values

# Membagi data menjadi training (80%) dan testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Membangun model dengan TensorFlow (Keras)
model = keras.Sequential([
    keras.layers.Dense(64, activation="relu", input_shape=(X_train.shape[1],)),
    keras.layers.Dense(32, activation="relu"),
    keras.layers.Dense(1)  # Output layer untuk regresi
])

# Kompilasi model
model.compile(optimizer="adam", loss="mse", metrics=["mse"])

# Training model
num_epochs = 50
history = model.fit(X_train, y_train, epochs=num_epochs, batch_size=32, validation_split=0.1, verbose=1)

# Evaluasi model
y_pred = model.predict(X_test)

# Hitung metrik evaluasi
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"\nEvaluasi Model:")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-Squared (R²): {r2:.4f}")


Epoch 1/50


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Distance"].fillna(df["Distance"].median(), inplace=True)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - loss: 1352.0593 - mse: 1352.0593 - val_loss: 1236.2085 - val_mse: 1236.2085
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1173.6807 - mse: 1173.6807 - val_loss: 983.8384 - val_mse: 983.8384
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 915.6106 - mse: 915.6106 - val_loss: 627.9178 - val_mse: 627.9178
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 603.3255 - mse: 603.3255 - val_loss: 474.0075 - val_mse: 474.0075
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 476.3400 - mse: 476.3400 - val_loss: 459.0061 - val_mse: 459.0061
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 441.5398 - mse: 441.5398 - val_loss: 397.2764 - val_mse: 397.2764
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load dataset
df = pd.read_csv("/content/income (1).csv")

# Tampilkan beberapa baris pertama
print(df.head())

# Cek informasi dataset
print(df.info())

# Hapus missing values
df = df.dropna()

# Encode categorical features
categorical_columns = df.select_dtypes(include=['object']).columns
encoder = LabelEncoder()

for col in categorical_columns:
    df[col] = encoder.fit_transform(df[col])

# Normalisasi fitur numerik
scaler = StandardScaler()
numerical_columns = df.select_dtypes(include=['int64', 'float64']).columns
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Split dataset menjadi train-test
X = df.drop(columns=['income'])  # Asumsi target adalah 'income'
y = df['income']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


   age         workclass  fnlwgt  education  education-num  \
0   39         State-gov   77516  Bachelors             13   
1   50  Self-emp-not-inc   83311  Bachelors             13   
2   38           Private  215646    HS-grad              9   
3   53           Private  234721       11th              7   
4   28           Private  338409  Bachelors             13   

       marital-status         occupation   relationship   race     sex  \
0       Never-married       Adm-clerical  Not-in-family  White    Male   
1  Married-civ-spouse    Exec-managerial        Husband  White    Male   
2            Divorced  Handlers-cleaners  Not-in-family  White    Male   
3  Married-civ-spouse  Handlers-cleaners        Husband  Black    Male   
4  Married-civ-spouse     Prof-specialty           Wife  Black  Female   

   capital-gain  capital-loss  hours-per-week native-country income  
0          2174             0              40  United-States  <=50K  
1             0             0             

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = encoder.fit_transform(df[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = encoder.fit_transform(df[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = encoder.fit_transform(df[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[

In [19]:
# 1️⃣ Import Libraries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# 2️⃣ Load Dataset (Keep NaN)
file_path = "/content/income (1).csv"
df = pd.read_csv(file_path)

# Remove extra spaces from column names
df.columns = df.columns.str.strip()

# Convert target 'income' to 0 and 1
df["income"] = df["income"].str.strip()
df["income"] = df["income"].map({"<=50K": 0, ">50K": 1})

# Encode categorical features
for col in df.select_dtypes(include=["object"]).columns:
    df[col] = LabelEncoder().fit_transform(df[col])

# Split dataset
X = df.drop(columns=["income"])
y = df["income"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize data (Keep NaN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# =====================================
# 🟢  PyTorch Section 🟢
# =====================================

# Convert to PyTorch Tensors (Keep NaN but handle it for training)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Replace NaN in tensors with zeros for training
X_train_tensor = torch.nan_to_num(X_train_tensor, nan=0.0)
y_train_tensor = torch.nan_to_num(y_train_tensor, nan=0.0)

# Define PyTorch Model
class IncomeModel(nn.Module):
    def __init__(self):
        super(IncomeModel, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize PyTorch Model (Ensure float32)
model = IncomeModel().float()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training PyTorch Model (Ignore NaN in Loss)
print("\n🟢 Training PyTorch Model...")
epochs = 50
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)  # Ensure float32

    # Clamp outputs to prevent log(0) issue
    outputs = torch.clamp(outputs, min=1e-7, max=1 - 1e-7)

    loss = criterion(outputs, y_train_tensor)

    # Ignore NaN values in loss
    if torch.isnan(loss):
        print(f"Epoch [{epoch+1}/{epochs}] - Loss contains NaN, replacing with 0.0")
        loss = torch.nan_to_num(loss, nan=0.0)

    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], PyTorch Loss: {loss.item():.6f}")

# Evaluate PyTorch Model
y_pred_tensor = torch.sigmoid(model(X_test_tensor)).detach().numpy()
y_pred_pytorch = (y_pred_tensor > 0.5).astype(int)

# =====================================
# 🔵  TensorFlow (Keras) Section 🔵
# =====================================

# Define TensorFlow Model
model_tf = keras.Sequential([
    layers.Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(8, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Compile TensorFlow Model
model_tf.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Custom Callback to Handle NaN Loss in TensorFlow
class CustomCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if np.isnan(logs["loss"]):
            print(f"Epoch [{epoch+1}] - Loss is NaN, replacing with 0.0")
            logs["loss"] = 0.0
        if "val_loss" in logs and np.isnan(logs["val_loss"]):
            print(f"Epoch [{epoch+1}] - Val Loss is NaN, replacing with 0.0")
            logs["val_loss"] = 0.0

# Train TensorFlow Model
print("\n🔵 Training TensorFlow Model...")
model_tf.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1, validation_data=(X_test, y_test), callbacks=[CustomCallback()])

# Evaluate TensorFlow Model
y_pred_tf = (model_tf.predict(X_test) > 0.5).astype(int)

# =====================================
# 📊 Model Evaluation (Fix NaN in y_test)
# =====================================

print("\n===============================")
print("📊 Final Evaluation of Both Models")
print("===============================")

# ✅ Fix NaN in y_test before evaluation
y_test_clean = y_test.dropna().reset_index(drop=True)
y_pred_pytorch_clean = y_pred_pytorch[:len(y_test_clean)]
y_pred_tf_clean = y_pred_tf[:len(y_test_clean)]

print("\n🟢 PyTorch Model Results:")
print(f"Accuracy: {accuracy_score(y_test_clean, y_pred_pytorch_clean):.4f}")
print(f"Precision: {precision_score(y_test_clean, y_pred_pytorch_clean):.4f}")
print(f"Recall: {recall_score(y_test_clean, y_pred_pytorch_clean):.4f}")
print(f"F1-Score: {f1_score(y_test_clean, y_pred_pytorch_clean):.4f}")

print("\n🔵 TensorFlow Model Results:")
print(f"Accuracy: {accuracy_score(y_test_clean, y_pred_tf_clean):.4f}")
print(f"Precision: {precision_score(y_test_clean, y_pred_tf_clean):.4f}")
print(f"Recall: {recall_score(y_test_clean, y_pred_tf_clean):.4f}")
print(f"F1-Score: {f1_score(y_test_clean, y_pred_tf_clean):.4f}")



🟢 Training PyTorch Model...
Epoch [10/50], PyTorch Loss: 0.693147
Epoch [20/50], PyTorch Loss: 0.693147
Epoch [30/50], PyTorch Loss: 0.693147
Epoch [40/50], PyTorch Loss: 0.693147
Epoch [50/50], PyTorch Loss: 0.693147

🔵 Training TensorFlow Model...
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1208/1222[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.5087 - loss: nanEpoch [1] - Loss is NaN, replacing with 0.0
Epoch [1] - Val Loss is NaN, replacing with 0.0
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.5087 - loss: nan - val_accuracy: 0.5053 - val_loss: 0.0000e+00
Epoch 2/50
[1m1213/1222[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.5072 - loss: nanEpoch [2] - Loss is NaN, replacing with 0.0
Epoch [2] - Val Loss is NaN, replacing with 0.0
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.5072 - loss: nan - val_accuracy: 0.5053 - val_loss: 0.0000e+00
Epoch 3/50
[1m1202/1222[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.5082 - loss: nanEpoch [3] - Loss is NaN, replacing with 0.0
Epoch [3] - Val Loss is NaN, replacing with 0.0
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/s

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
!pip install tensorflow



In [3]:
!pip install torch torchvision torchaudio tensorflow scikit-learn pandas numpy matplotlib seaborn


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5