### Import library

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.cluster import KMeans

from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score # SDB-CH

from sklearn.metrics import adjusted_rand_score # ARI
from sklearn.metrics import normalized_mutual_info_score # NMI
from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score #


### Import dataset

In [None]:
dfHouse = pd.read_csv('House_Rent_Dataset.csv')

In [None]:
dfHouse.head()

### Melihat bentukan dataset

In [None]:
dfHouse.info()

### Mengecek niali Null dan Duplikat pada dataset

In [None]:
print(dfHouse.isna().sum())
print("Duplicated data : ",dfHouse.duplicated().sum())

### Menghapus outlier

In [None]:
dfHouse = dfHouse[(np.abs(stats.zscore(dfHouse.select_dtypes(['int64' , 'float64'])))<3).all(axis=1)]

### Preprocessing

In [None]:
# Scaling data numerik
mm_scaler = MinMaxScaler()
dfHouse[["BHK","Rent","Size","Bathroom"]] = mm_scaler.fit_transform(dfHouse[["BHK","Rent","Size","Bathroom"]])

# Menghapus column tanggal
dfHouse.drop(['Posted On'], axis=1, inplace=True)

# Encoding column Kategorikal
col=['Floor', 'Area Type', 'Area Locality', 'City', 'Furnishing Status', 'Point of Contact', 'Tenant Preferred']
le=LabelEncoder()
for i in col:
    dfHouse[i]=le.fit_transform(dfHouse[i])

dfHouse.head(10)


## A. Supervised Learning

### A1. Support Vector classifier

In [None]:
x = dfHouse[["Rent","Size"]] #feature
y = dfHouse["Tenant Preferred"] #target

# data split menjadi data train dan data test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

# membuat model SVC dengan data train
svm = SVC(kernel='linear', class_weight='balanced')
svm.fit(x_train, y_train)

# prediksi data test
y_pred = svm.predict(x_test)

# menyimpan nilai support vector dan indexnya
support_vectors = svm.support_vectors_
support_vector_indices = np.where(np.isin(x_train, support_vectors).all(axis=1))

### A2. Evaluasi hasil

### Confusion matrix SVM

In [None]:
svm_cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(10,8))
sns.heatmap(svm_cm, annot=True, fmt='.2f', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Truth')
plt.title('Confusion Matrix SVM')
plt.show()

### Report Classification SVM

In [None]:
print(classification_report(y_test, y_pred))


### A3. Plotting hasil prediksi SVM

In [None]:

X = dfHouse[["Rent","Size"]]
y = dfHouse["Tenant Preferred"]

svm = SVC()
svm.fit(X, y)

plt.figure(figsize=(20, 12))
sns.scatterplot(x=dfHouse["Rent"], y=dfHouse["Size"], hue=dfHouse["Tenant Preferred"], data=dfHouse)

ax = plt.gca()

DecisionBoundaryDisplay.from_estimator(
    estimator=svm,
    X=X.values,  
    plot_method="contourf",
    levels=[-1, 0, 1],
    alpha=0.5,
    linestyles=["--", "-", "--"],
    ax=ax,
)

# Plot support vectors
ax.scatter(
    svm.support_vectors_[:, 0],
    svm.support_vectors_[:, 1],
    s=100,
    linewidth=0.1,
    facecolors="none",
    edgecolors="k",
)
plt.xlabel("Rent")
plt.ylabel("Size")
plt.title("SVM dengan Support Vectors")
plt.show()

### B. Unsupervised Learning

### B1. K-Means Clustering

In [None]:
plt.figure(figsize=(15,10))

X = dfHouse[["Rent" , "Size" ]]
K =  2 # dari target

kmeans = KMeans(n_clusters=K, n_init=10) 
kmeans.fit(X) 
cluster_labels = kmeans.labels_ 
dfHouse["Tenant Preferred"] = cluster_labels 
centroids = kmeans.cluster_centers_ 
c = ["blue", "green"]
plt.scatter(centroids[:, 0], centroids[:, 1], alpha=0.5, c='red', s=200,label='Centroids')

sns.scatterplot(data=dfHouse, x=dfHouse['Rent'], y=dfHouse['Size'], hue="Tenant Preferred", palette=c)

plt.title(f"K-Means Clustering (K={K}) dengan Centroids")
plt.legend()
plt.show()

### B2. Evaluasi Hasil

### Tanpa label asli

In [None]:
# Silhouette Score
ss_score = silhouette_score(X, dfHouse['Tenant Preferred'])
print(f"Silhouette Score: {ss_score:.2f}")

# Davies-Bouldin Index
db_index = davies_bouldin_score(X, dfHouse['Tenant Preferred'])
print(f"Davies-Bouldin Index: {db_index:.2f}")

# Calinski-Harabasz Index
ch_index = calinski_harabasz_score(X, dfHouse['Tenant Preferred'])
print(f"Calinski-Harabasz Index: {ch_index:.2f}")

### Dengan label asli

In [None]:
# Buat crosstab untuk membandingkan cluster labels dengan true labels
cross_tab = pd.crosstab(dfHouse["Tenant Preferred"], dfHouse["City"])
print("Cross-Tabulation:")
print(cross_tab, "\n")

# Adjusted Rand Index (ARI)
ari = adjusted_rand_score(dfHouse["City"], dfHouse['Tenant Preferred'])
print(f"Adjusted Rand Index (ARI): {ari:.2f}")

# Normalized Mutual Information (NMI)
nmi = normalized_mutual_info_score(dfHouse['City'], dfHouse['Tenant Preferred'])
print(f"Normalized Mutual Information (NMI): {nmi:.2f}")

# Fungsi HCV-M
homogeneity = homogeneity_score(dfHouse["City"], dfHouse['Tenant Preferred'])
completeness = completeness_score(dfHouse["City"], dfHouse['Tenant Preferred'])
v_measure = v_measure_score(dfHouse["City"], dfHouse['Tenant Preferred'])
print(f"Homogeneity Score: {homogeneity:.2f}")
print(f"Completeness Score: {completeness:.2f}")
print(f"V-Measure Score: {v_measure:.2f}")


### B3. Plotting 

In [None]:

plt.figure(figsize=(15, 10))

# True Labels
sns.scatterplot(data=dfHouse, x="Rent", y="Size", hue="City", palette='Set2', marker="D", s=130)
# Clusters
sns.scatterplot(data=dfHouse, x="Rent", y="Size", hue="Tenant Preferred", palette='Set2', marker="o", s=40)
# Centroids
plt.scatter(centroids[:, 0], centroids[:, 1], alpha=0.5, c='red', s=100,)

plt.title(f"K-Means Clustering (K={K}) dengan Centroids")
plt.xlabel("Lead Time")
plt.ylabel("Average Price per Room")

# Explicitly define legends
true_labels_legend = plt.legend(loc="upper right")

# Add legend for clusters and centroids
plt.legend(title="Clusters", loc="lower right")

plt.show()
