In [51]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

In [52]:
# Load the dataset
df = pd.read_csv("dataset.csv")

In [53]:
# Display The first few rows to ensure it's loaded correctly
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased,Marital_Status,Occupation
0,100001,Male,34,15000000,0,Divorced,Professor
1,100002,Female,29,35000000,1,Single,Engineer
2,100003,Male,45,45000000,0,Married,Manager
3,100004,Female,41,25000000,1,Married,Doctor
4,100005,Male,23,20000000,0,Single,Student


In [54]:
# Split features and target
inputs = df.drop(['User ID', 'Purchased'], axis='columns')
target = df['Purchased']

In [55]:
# Initialize label encoders for categorical features
le_Gender = LabelEncoder()
le_Marital_Status = LabelEncoder()
le_Occupation = LabelEncoder()

In [56]:
# Apply label encoding
inputs['Gender'] = le_Gender.fit_transform(inputs['Gender'])
inputs['Marital_Status'] = le_Marital_Status.fit_transform(inputs['Marital_Status'])
inputs['Occupation'] = le_Occupation.fit_transform(inputs['Occupation'])

In [57]:
# Check the encoded dataframe
inputs.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Marital_Status,Occupation
0,1,34,15000000,0,22
1,0,29,35000000,2,12
2,1,45,45000000,1,17
3,0,41,25000000,1,10
4,1,23,20000000,2,25


In [58]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(inputs, target, test_size=0.2, random_state=42)

In [59]:
# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [60]:
# Train a Decision Tree classifier
model = DecisionTreeClassifier()
model.fit(X_train_scaled, y_train)

DecisionTreeClassifier()

In [62]:
# Make predictions
y_pred = model.predict(X_test_scaled)

In [63]:
# Evaluate the model
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

In [64]:
cm, accuracy

(array([[53,  3],
        [ 0, 44]], dtype=int64),
 0.97)

In [65]:
# Drop selected columns for another model
inputs_n = inputs.drop(['Gender', 'Age', 'EstimatedSalary'], axis='columns')

In [66]:
# Split the data into training and test sets for the new inputs
X_train_n, X_test_n, y_train_n, y_test_n = train_test_split(inputs_n, target, test_size=0.2, random_state=42)

In [67]:
# Feature scaling for the new inputs
scaler_n = StandardScaler()
X_train_scaled_n = scaler_n.fit_transform(X_train_n)
X_test_scaled_n = scaler_n.transform(X_test_n)

In [68]:
# Train the new model
model_n = DecisionTreeClassifier()
model_n.fit(X_train_scaled_n, y_train_n)

DecisionTreeClassifier()

In [69]:
# Make predictions with the new model
y_pred_n = model_n.predict(X_test_scaled_n)

In [70]:
# Evaluate the new model
cm_n = confusion_matrix(y_test_n, y_pred_n)
accuracy_n = accuracy_score(y_test_n, y_pred_n)

In [71]:
cm_n, accuracy_n

(array([[53,  3],
        [ 4, 40]], dtype=int64),
 0.93)

In [77]:
# Make predictions with the new model
prediction_1 = model_n.predict([[2, 3]])
prediction_2 = model_n.predict([[2, 1]])

In [74]:
prediction_1, prediction_2

(array([1], dtype=int64), array([1], dtype=int64))

In [79]:
#Print out the predictions
print("Hasil prediksi 1:", prediction_1)
print("Hasil prediksi 2:", prediction_2)

Hasil prediksi 1: [1]
Hasil prediksi 2: [1]
