In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
df = pd.read_csv('dataset_buys_comp.csv')

In [3]:
print(df.head())

          Age  Income Student Credit_Rating  Buys_Computer
0         Tua  Tinggi      Ya         Buruk              0
1  Paruh Baya  Tinggi      Ya          Baik              1
2  Paruh Baya  Rendah   Tidak         Buruk              1
3         Tua  Rendah   Tidak          Baik              1
4         Tua  Rendah      Ya          Baik              1


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Age            1000 non-null   object
 1   Income         1000 non-null   object
 2   Student        1000 non-null   object
 3   Credit_Rating  1000 non-null   object
 4   Buys_Computer  1000 non-null   int64 
dtypes: int64(1), object(4)
memory usage: 39.2+ KB


In [5]:
le = LabelEncoder()

df_encoded = df.copy()
for column in df.columns[:-1]:  # semua kecuali target
    df_encoded[column] = le.fit_transform(df[column])

In [6]:
X = df_encoded.drop('Buys_Computer', axis=1)  # Use the encoded features
y = df_encoded['Buys_Computer']

In [7]:
X

Unnamed: 0,Age,Income,Student,Credit_Rating
0,2,2,1,1
1,1,2,1,0
2,1,0,0,1
3,2,0,0,0
4,2,0,1,0
...,...,...,...,...
995,0,2,0,1
996,1,0,1,1
997,1,2,0,1
998,0,2,1,1


In [8]:
y

Unnamed: 0,Buys_Computer
0,0
1,1
2,1
3,1
4,1
...,...
995,0
996,1
997,0
998,1


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
X_train

Unnamed: 0,Age,Income,Student,Credit_Rating
29,0,1,1,0
535,2,2,0,1
695,1,0,1,1
557,2,0,1,1
836,2,2,0,0
...,...,...,...,...
106,0,0,0,1
270,1,1,1,0
860,0,1,0,0
435,0,0,1,0


In [11]:
X_test

Unnamed: 0,Age,Income,Student,Credit_Rating
521,0,2,0,1
737,1,2,0,1
740,1,1,0,0
660,0,2,1,0
411,1,0,0,1
...,...,...,...,...
408,0,0,1,0
332,1,1,0,1
208,1,0,1,0
613,0,1,1,0


In [12]:
y_train

Unnamed: 0,Buys_Computer
29,1
535,0
695,1
557,0
836,1
...,...
106,1
270,1
860,1
435,1


In [13]:
y_test

Unnamed: 0,Buys_Computer
521,0
737,0
740,1
660,1
411,1
...,...
408,1
332,1
208,1
613,1


In [14]:
classifier = GaussianNB()
classifier.fit(X_train, y_train)

In [15]:
y_pred = classifier.predict(X_test)

In [16]:
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Confusion Matrix:
 [[ 28  43]
 [  9 120]]


In [17]:
print("Classification Report:\n", classification_report(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.39      0.52        71
           1       0.74      0.93      0.82       129

    accuracy                           0.74       200
   macro avg       0.75      0.66      0.67       200
weighted avg       0.74      0.74      0.71       200



In [18]:
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.74
