In [1]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
path = "/content/gdrive/MyDrive/praktikum/Praktikum 04"

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
    confusion_matrix, classification_report, RocCurveDisplay, ConfusionMatrixDisplay
)

In [4]:
df = pd.read_csv( path + '/Data/calonpembelimobil.csv')
df

Unnamed: 0,ID,Usia,Status,Kelamin,Memiliki_Mobil,Penghasilan,Beli_Mobil
0,1,32,1,0,0,240,1
1,2,49,2,1,1,100,0
2,3,52,1,0,2,250,1
3,4,26,2,1,1,130,0
4,5,45,3,0,2,237,1
...,...,...,...,...,...,...,...
995,996,51,0,1,2,293,1
996,997,61,1,0,2,275,1
997,998,45,1,0,0,406,1
998,999,48,0,1,2,432,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   ID              1000 non-null   int64
 1   Usia            1000 non-null   int64
 2   Status          1000 non-null   int64
 3   Kelamin         1000 non-null   int64
 4   Memiliki_Mobil  1000 non-null   int64
 5   Penghasilan     1000 non-null   int64
 6   Beli_Mobil      1000 non-null   int64
dtypes: int64(7)
memory usage: 54.8 KB


In [6]:
#cek missing value
df.isnull().sum()

Unnamed: 0,0
ID,0
Usia,0
Status,0
Kelamin,0
Memiliki_Mobil,0
Penghasilan,0
Beli_Mobil,0


In [7]:
display(df.head())

Unnamed: 0,ID,Usia,Status,Kelamin,Memiliki_Mobil,Penghasilan,Beli_Mobil
0,1,32,1,0,0,240,1
1,2,49,2,1,1,100,0
2,3,52,1,0,2,250,1
3,4,26,2,1,1,130,0
4,5,45,3,0,2,237,1


In [10]:
#Buat Model Logistic Linear Regresssion
# Pisahkan fitur dan target
X = df[['Usia', 'Status', 'Kelamin', 'Memiliki_Mobil', 'Penghasilan']]
y = df['Beli_Mobil']

# Bagi data menjadi data latih dan data uji (80:20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Buat dan latih model
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

# Prediksi data uji
y_pred = clf.predict(X_test)

print(" Evaluasi Model ")
print(f"Akurasi: {accuracy_score(y_test, y_pred):.4f}")
print("\nLaporan Klasifikasi:")
print(classification_report(y_test, y_pred, target_names=['Tidak Beli (0)', 'Beli (1)']))

 Evaluasi Model 
Akurasi: 0.9300

Laporan Klasifikasi:
                precision    recall  f1-score   support

Tidak Beli (0)       0.92      0.86      0.89        64
      Beli (1)       0.94      0.96      0.95       136

      accuracy                           0.93       200
     macro avg       0.93      0.91      0.92       200
  weighted avg       0.93      0.93      0.93       200



In [11]:
#Menggunakan Data baru untuk menguji model
data_baru = pd.DataFrame({
    'Usia': [30, 50],
    'Status': [1, 2],
    'Kelamin': [1, 0],          # 1=Laki-laki, 0=Perempuan
    'Memiliki_Mobil': [0, 1],
    'Penghasilan': [200, 300]
})

# Prediksi & probabilitas
pred = clf.predict(data_baru)
prob = clf.predict_proba(data_baru)[:, 1]

# Gabungkan hasil ke tabel
hasil = data_baru.copy()
hasil['Prob_Beli'] = prob
hasil['Pred (0=Tidak,1=Ya)'] = pred

print("Hasil Prediksi Data Baru")
display(hasil)

Hasil Prediksi Data Baru


Unnamed: 0,Usia,Status,Kelamin,Memiliki_Mobil,Penghasilan,Prob_Beli,"Pred (0=Tidak,1=Ya)"
0,30,1,1,0,200,0.156571,0
1,50,2,0,1,300,0.996045,1
