In [None]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Membaca dataset (ganti dengan jalur file yang sesuai)
data = pd.read_csv("/content/mobileprice_modified.csv")  # Sesuaikan dengan lokasi file yang tepat

# a. Pisahkan bagian atribut prediktor dengan label data
# Asumsi bahwa 'price_range' adalah kolom target, sesuai dengan file yang kita analisis sebelumnya
X = data.drop(columns='price_range')
y = data['price_range']

# Tampilkan statistik deskriptif sebelum praproses
print("Statistik Deskriptif Sebelum Pengisian Missing Values:")
print(X.describe())

# b. Tangani missing values dengan SimpleImputer (strategi = 'mean')
imputer = SimpleImputer(strategy='mean')
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Tampilkan statistik deskriptif setelah pengisian missing values
print("\nStatistik Deskriptif Setelah Pengisian Missing Values:")
print(X_imputed.describe())

# c. Lakukan standardisasi dengan StandardScaler
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X_imputed), columns=X_imputed.columns)

# Tampilkan statistik deskriptif setelah standarisasi
print("\nStatistik Deskriptif Setelah Standarisasi:")
print(X_scaled.describe())


Statistik Deskriptif Sebelum Pengisian Missing Values:
       battery_power       blue  clock_speed     dual_sim           fc  \
count    1990.000000  2000.0000  2000.000000  2000.000000  2000.000000   
mean     1237.867839     0.4950     1.522250     0.509500     4.309500   
std       439.676025     0.5001     0.816004     0.500035     4.341444   
min       501.000000     0.0000     0.500000     0.000000     0.000000   
25%       850.250000     0.0000     0.700000     0.000000     1.000000   
50%      1225.000000     0.0000     1.500000     1.000000     3.000000   
75%      1615.000000     1.0000     2.200000     1.000000     7.000000   
max      1998.000000     1.0000     3.000000     1.000000    19.000000   

            four_g   int_memory        m_dep    mobile_wt      n_cores  \
count  2000.000000  1990.000000  2000.000000  1990.000000  2000.000000   
mean      0.521500    31.987940     0.501750   140.344221     4.520500   
std       0.499662    18.136427     0.288416    35.40711

In [None]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

# Baca dataset
data = pd.read_csv("/content/mobileprice_modified.csv")  # Sesuaikan dengan lokasi file

# Pisahkan atribut prediktor dan label
X = data.drop(columns='price_range')
y = data['price_range']

# Tangani missing values
imputer = SimpleImputer(strategy='mean')
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Standardisasi data
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X_imputed), columns=X_imputed.columns)

# Bagi data menjadi data latih dan data uji dengan proporsi 85% dan 15%
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.15, random_state=42)

# Pilih model yang akan digunakan: Decision Tree atau SVM
# Untuk contoh ini, kita akan menggunakan Decision Tree terlebih dahulu
# Anda bisa mengganti dengan model SVM dengan menghapus komentar pada bagian SVM di bawah ini

# Decision Tree
model = DecisionTreeClassifier(random_state=42)

# SVM (uncomment jika ingin menggunakan SVM)
# model = SVC(kernel='linear', random_state=42)

# Latih model dengan data latih
model.fit(X_train, y_train)

# Prediksi data uji
y_pred = model.predict(X_test)

# Evaluasi model
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

# Tampilkan hasil
print("Confusion Matrix:")
print(conf_matrix)
print("\nAkurasi:", accuracy)


Confusion Matrix:
[[69 11  0  0]
 [ 7 61  4  0]
 [ 0  7 49 12]
 [ 0  0  7 73]]

Akurasi: 0.84


In [None]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# Baca dataset
data = pd.read_csv("/content/mobileprice_modified.csv")  # Sesuaikan dengan lokasi file

# Pisahkan atribut prediktor (tanpa label 'price_range' untuk clustering)
X = data.drop(columns='price_range')

# Tangani missing values
imputer = SimpleImputer(strategy='mean')
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Standardisasi data
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X_imputed), columns=X_imputed.columns)

# Tentukan jumlah cluster yang diinginkan
n_clusters = 4  # Sesuaikan dengan jumlah yang diinginkan

# Latih model K-Means
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(X_scaled)

# Evaluasi model menggunakan silhouette score
silhouette_avg = silhouette_score(X_scaled, cluster_labels)

# Tampilkan hasil
print("Centroid setiap cluster:")
print(kmeans.cluster_centers_)
print("\nSilhouette Score:", silhouette_avg)


Centroid setiap cluster:
[[-0.0284788   0.0592535   0.06170752  0.02597257 -0.05256007 -1.04396559
   0.02221306  0.02363609 -0.01332469  0.03550663 -0.02578491  0.04198274
  -0.00954344 -0.03971978 -0.02853728 -0.06155033  0.08187402 -1.78686097
  -0.02098967 -0.00757676]
 [ 0.07279385 -0.07458084 -0.03418542  0.04236752 -0.52331104  0.26736338
  -0.02464443  0.00198279  0.03116085 -0.07892972 -0.40473279 -0.36963348
  -0.27316271 -0.08367489 -0.57513677 -0.54812013  0.00982622  0.55964063
  -0.09057874 -0.19809902]
 [ 0.09857957 -0.04069328  0.08909152 -0.09274946  1.3062891   0.28445204
  -0.16278393  0.03388576  0.03634422 -0.04724316  1.06042151 -0.03383942
  -0.07840831  0.11315344 -0.03283261 -0.09569233  0.01654572  0.51098046
   0.06773394  0.01365111]
 [-0.14794078  0.0704874  -0.0944946   0.00519449 -0.45731322  0.40899119
   0.1514825  -0.05431471 -0.05713862  0.10386402 -0.4115474   0.43945435
   0.40968369  0.04011391  0.75480591  0.80804619 -0.10351053  0.55490978
   0.0