# Prediksi Kategori Harga Rumah Jabodetabek

Notebook ini membangun model AI untuk memprediksi kategori harga rumah menggunakan **Naive Bayes**, **Fuzzy Logic**, dan **Artificial Neural Networks** (ANN), tanpa menggunakan K-Nearest Neighbors.

## Library

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import skfuzzy as fuzz
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

## Import Data

In [None]:
data_url = 'https://raw.githubusercontent.com/MuhGhifari/Prediksi-Harga-Rumah-Jabodetabek/refs/heads/main/jabodetabek_house_price.csv'
data = pd.read_csv(data_url)
data = data[['price_in_rp','latitude','longitude','bedrooms','bathrooms','land_size_m2','building_size_m2','carports','maid_bedrooms','maid_bathrooms','floors','building_age','year_built','property_condition','furnishing','certificate','garages']]
data.rename(columns={'price_in_rp':'price','land_size_m2':'land_size','building_size_m2':'building_size'}, inplace=True)
data.head()

## Preprocessing & Feature Engineering

In [None]:
# Drop rows with missing essential values
data = data.dropna(subset=['bedrooms','bathrooms','floors','property_condition'])

# Buat kategori harga (Low, Medium, High) berdasarkan quantile
data['price_category'] = pd.qcut(data['price'], q=3, labels=['Low','Medium','High'])

# Encode categorical features
le_condition = LabelEncoder()
le_furnish = LabelEncoder()
le_cert = LabelEncoder()
data['property_condition'] = le_condition.fit_transform(data['property_condition'].astype(str))
data['furnishing'] = le_furnish.fit_transform(data['furnishing'].astype(str))
data['certificate'] = le_cert.fit_transform(data['certificate'].astype(str))

# Pilih fitur
features = ['bedrooms','bathrooms','land_size','building_size','carports','maid_bedrooms','maid_bathrooms','floors','building_age','property_condition','furnishing','certificate','garages']
X = data[features]
y = data['price_category']

## Split Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## 1. Naive Bayes

In [None]:
nb = GaussianNB()
nb.fit(X_train_scaled, y_train)
y_pred_nb = nb.predict(X_test_scaled)
print('Akurasi Naive Bayes:', accuracy_score(y_test, y_pred_nb))
print(classification_report(y_test, y_pred_nb))

## 2. Fuzzy Logic (Fuzzy C-Means Clustering untuk Kategori Harga)

In [None]:
# Gunakan hanya fitur numerik untuk Fuzzy C-Means
X_fuzzy = X_train_scaled.T
cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(X_fuzzy, c=3, m=2, error=0.005, maxiter=1000, init=None)

# Prediksi cluster untuk data test
u_test, _, _, _, _, _ = fuzz.cluster.cmeans_predict(X_test_scaled.T, cntr, m=2, error=0.005, maxiter=1000)
fuzzy_labels = np.argmax(u_test, axis=0)

# Mapping cluster ke kategori harga berdasarkan rata-rata harga di cluster
cluster_map = {}
for i in range(3):
    idx = np.argmax(u, axis=0) == i
    mean_price = y_train.reset_index(drop=True)[idx].value_counts().idxmax()
    cluster_map[i] = mean_price
fuzzy_pred = [cluster_map[c] for c in fuzzy_labels]
print('Akurasi Fuzzy Logic:', accuracy_score(y_test, fuzzy_pred))
print(classification_report(y_test, fuzzy_pred))

## 3. Artificial Neural Network (ANN)

In [None]:
# Encode target
le_y = LabelEncoder()
y_train_enc = le_y.fit_transform(y_train)
y_test_enc = le_y.transform(y_test)
y_train_cat = to_categorical(y_train_enc)
y_test_cat = to_categorical(y_test_enc)

# Build ANN
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_scaled, y_train_cat, epochs=30, batch_size=32, verbose=0)

# Evaluate
loss, acc = model.evaluate(X_test_scaled, y_test_cat, verbose=0)
print('Akurasi ANN:', acc)
y_pred_ann = np.argmax(model.predict(X_test_scaled), axis=1)
print(classification_report(y_test_enc, y_pred_ann, target_names=le_y.classes_))