In [66]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [67]:
file_path = '/content/adult.csv'
data = pd.read_csv(file_path)
data.head()

Unnamed: 0,age,workclass,fnlwgt,education,education.num,marital.status,occupation,relationship,race,sex,capital.gain,capital.loss,hours.per.week,native.country,income
0,90,?,77053,HS-grad,9,Widowed,?,Not-in-family,White,Female,0,4356,40,United-States,<=50K
1,82,Private,132870,HS-grad,9,Widowed,Exec-managerial,Not-in-family,White,Female,0,4356,18,United-States,<=50K
2,66,?,186061,Some-college,10,Widowed,?,Unmarried,Black,Female,0,4356,40,United-States,<=50K
3,54,Private,140359,7th-8th,4,Divorced,Machine-op-inspct,Unmarried,White,Female,0,3900,40,United-States,<=50K
4,41,Private,264663,Some-college,10,Separated,Prof-specialty,Own-child,White,Female,0,3900,40,United-States,<=50K


### Memilih fitur yang relevan dan target (income)

In [68]:
features = ['age', 'education.num', 'hours.per.week', 'capital.gain', 'capital.loss']
X = data[features]
y = data['income']

### Mengubah nilai kategorikal target 'income' menjadi numerik menggunakan LabelEncoder

In [69]:

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)  # 'income' menjadi 0 (<=50K) dan 1 (>50K)

### Membagi dataset menjadi training (70%) dan validation (30%)

In [70]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

### Melatih model menggunakan Logistic Regression

In [71]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

### Mengecek akurasi pada training dan validation set

In [72]:
y_train_pred = model.predict(X_train)
y_val_pred = model.predict(X_val)

train_accuracy = accuracy_score(y_train, y_train_pred)
val_accuracy = accuracy_score(y_val, y_val_pred)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

Training Accuracy: 81.34%
Validation Accuracy: 81.74%


In [73]:
!pip install tensorflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense



### Membuat model sequential

In [74]:
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Compile model

In [75]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

### Melatih model

In [76]:
history = model.fit(X_train, y_train, epochs=10, batch_size=16, validation_data=(X_val, y_val))

Epoch 1/10
[1m1425/1425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.7615 - loss: 3.0411 - val_accuracy: 0.7930 - val_loss: 0.7916
Epoch 2/10
[1m1425/1425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - accuracy: 0.7915 - loss: 0.9942 - val_accuracy: 0.8082 - val_loss: 0.4788
Epoch 3/10
[1m1425/1425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.7838 - loss: 0.9214 - val_accuracy: 0.7716 - val_loss: 0.4545
Epoch 4/10
[1m1425/1425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.7593 - loss: 0.4740 - val_accuracy: 0.7700 - val_loss: 0.4566
Epoch 5/10
[1m1425/1425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.7645 - loss: 0.4664 - val_accuracy: 0.7626 - val_loss: 0.4629
Epoch 6/10
[1m1425/1425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7624 - loss: 0.4525 - val_accuracy: 0.7650 - val_loss: 0.4324
Epoch 7/10
[

### Mengecek akurasi pada set validation

In [77]:
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

[1m306/306[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7937 - loss: 0.4278
Validation Accuracy: 79.26%
