In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression
from sklearn.metrics import classification_report, confusion_matrix, mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [3]:
# Import dataset
data = pd.read_csv('diamonds.csv')

# A. Supervised Learning

## Preprocessing Data

Lakukan preprocessing data, seperti mengubah label kategorikal menjadi numerik

### Klasifikasi (Support Vector Classifier - SVC)

In [4]:
# Preprocessing data kategorikal
data = pd.get_dummies(data, columns=['cut', 'color', 'clarity'])

In [5]:
# Pisahkan fitur (X) dan label (y)
X = data.drop('price', axis=1)
y = data['price']

In [6]:
# Bagi data menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Inisialisasi dan melatih model SVC
svc = SVC()
svc.fit(X_train, y_train)

In [None]:
# Melakukan prediksi
y_pred = svc.predict(X_test)

In [None]:
# Evaluasi model
confusion = confusion_matrix(y_test, y_pred)
classification = classification_report(y_test, y_pred)

In [None]:
print("Confusion Matrix:\n", confusion)
print("\nClassification Report:\n", classification)

### Regresi (Linear Regression)

In [None]:
# Pisahkan fitur (X) dan label (y)
X = data[['carat', 'depth', 'table', 'x', 'y', 'z']]
y = data['price']

In [None]:
# Bagi data menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Inisialisasi dan melatih model Linear Regression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
# Melakukan prediksi
y_pred = regressor.predict(X_test)

In [None]:
# Evaluasi model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

In [None]:
print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)

## Plot Hasil Prediksi

In [None]:
#menggunakan matplotlib
plt.scatter(y_test, y_pred)
plt.xlabel("Harga Asli")
plt.ylabel("Harga Prediksi")
plt.title("Plot Prediksi Harga")
plt.show()

# B. Unsupervised Learning



In [None]:
#mengimpor library yang diperlukan dan mengambil data dari dataset
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

### Melakukan K-Means Clustering

In [None]:
# Inisialisasi model K-Means
kmeans = KMeans(n_clusters=5)  # Ganti jumlah cluster sesuai dengan kebutuhan

# Fit model K-Means
kmeans.fit(data)

In [None]:
# Prediksi cluster
labels = kmeans.labels_

In [None]:
# Evaluasi K-Means tanpa label asli
silhouette_avg = silhouette_score(data, labels)
calinski_harabasz = calinski_harabasz_score(data, labels)
davies_bouldin = davies_bouldin_score(data, labels)

In [None]:
print("Silhouette Score:", silhouette_avg)
print("Calinski-Harabasz Score:", calinski_harabasz)
print("Davies-Bouldin Score:", davies_bouldin)

In [None]:
# Prediksi cluster
labels = kmeans.labels_

In [None]:
# Evaluasi K-Means dengan label asli
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score

In [None]:
# Misalkan "true_labels" adalah label asli
true_labels = data['true_labels']

In [None]:
ari = adjusted_rand_score(true_labels, labels)
nmi = normalized_mutual_info_score(true_labels, labels)

In [None]:
print("Adjusted Rand Index (ARI):", ari)
print("Normalized Mutual Information (NMI):", nmi)

## Plot Hasil Clustering

In [None]:
# Plot hasil clustering
centers = kmeans.cluster_centers_
plt.scatter(data['X'], data['Y'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red', marker='x')
plt.xlabel("Fitur X")
plt.ylabel("Fitur Y")
plt.title("Hasil Clustering dengan K-Means")
plt.show()