In [None]:
# -*- coding: utf-8 -*-
"""MODEL_DASAR"""

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib  # Tambahkan pustaka untuk menyimpan/memuat model

# 1. Memuat dan Memproses Dataset
def load_and_preprocess_data(file_path):
    data = pd.read_csv(file_path)
    data.columns = data.columns.str.strip()

    selected_features = [
        "building_size_m2",
        "land_size_m2",
        "electricity",
        "year_built",
        "bathrooms",
        "bedrooms",
        "carports",
        "garages",
        "floors",
        "Bogor", "Depok", "Jakarta Barat", "Jakarta Pusat", "Jakarta Selatan",
        "Jakarta Timur", "Jakarta Utara", "Tangerang", "Bekasi"
    ]
    target = "price_in_rp"

    # Log transform both features and target for better training
    data[selected_features] = np.log1p(data[selected_features])
    data[target] = np.log1p(data[target])

    X = data[selected_features].values
    y = data[target].values.reshape(-1)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    return X_train, X_test, y_train, y_test, selected_features

# 2. Implementasi Decision Tree Regressor
class DecisionTreeRegressorNode:
    def __init__(self, feature_index=None, threshold=None, left=None, right=None, value=None):
        self.feature_index = feature_index
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

class DecisionTreeRegressor:
    def __init__(self, max_depth=5, min_samples_split=3):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.root = None

    def fit(self, X, y):
        self.root = self._grow_tree(X, y)

    def predict(self, X):
        return np.array([self._predict(inputs, self.root) for inputs in X])

    def _grow_tree(self, X, y, depth=0):
        num_samples, num_features = X.shape
        if depth < self.max_depth and num_samples >= self.min_samples_split:
            best_split = self._find_best_split(X, y, num_features)
            if best_split:
                left = self._grow_tree(X[best_split['left_indices']], y[best_split['left_indices']], depth + 1)
                right = self._grow_tree(X[best_split['right_indices']], y[best_split['right_indices']], depth + 1)
                return DecisionTreeRegressorNode(
                    feature_index=best_split['feature_index'],
                    threshold=best_split['threshold'],
                    left=left,
                    right=right
                )
        return DecisionTreeRegressorNode(value=np.mean(y))

    def _find_best_split(self, X, y, num_features):
        best_split = {}
        min_error = float("inf")

        for feature_index in range(num_features):
            thresholds = np.unique(X[:, feature_index])
            for threshold in thresholds:
                left_indices = np.where(X[:, feature_index] <= threshold)[0]
                right_indices = np.where(X[:, feature_index] > threshold)[0]
                if len(left_indices) > 0 and len(right_indices) > 0:
                    left_mean = np.mean(y[left_indices])
                    right_mean = np.mean(y[right_indices])
                    left_error = mean_squared_error(y[left_indices], np.full(len(left_indices), left_mean))
                    right_error = mean_squared_error(y[right_indices], np.full(len(right_indices), right_mean))
                    total_error = (len(left_indices) * left_error + len(right_indices) * right_error) / len(y)

                    if total_error < min_error:
                        min_error = total_error
                        best_split = {
                            'feature_index': feature_index,
                            'threshold': threshold,
                            'left_indices': left_indices,
                            'right_indices': right_indices
                        }
        return best_split if best_split else None

    def _predict(self, inputs, node):
        if node.value is not None:
            return node.value
        if inputs[node.feature_index] <= node.threshold:
            return self._predict(inputs, node.left)
        return self._predict(inputs, node.right)

# 3. Implementasi Random Forest Regressor
class RandomForestRegressorFromScratch:
    def __init__(self, n_estimators=60, max_depth=7, min_samples_split=4):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.trees = []

    def fit(self, X, y):
        self.trees = []
        for _ in range(self.n_estimators):
            indices = np.random.choice(X.shape[0], X.shape[0], replace=True)
            X_sample = X[indices]
            y_sample = y[indices]
            tree = DecisionTreeRegressor(max_depth=self.max_depth, min_samples_split=self.min_samples_split)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        predictions = np.array([tree.predict(X) for tree in self.trees])
        return np.mean(predictions, axis=0)

# 4. Fungsi Evaluasi Model
def evaluate_model(y_true, y_pred):
    # Mengembalikan prediksi dan nilai asli ke skala asli

    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mse, rmse, mae, r2


# Fungsi Prediksi Berdasarkan Input Pengguna dengan Evaluasi
def user_input_prediction_with_evaluation(random_forest, selected_features, X_test, y_test):
    try:
        building_size = np.log1p(float(input("1. Luas bangunan (m²): ")))
        land_size = np.log1p(float(input("2. Luas tanah (m²): ")))
        electricity = np.log1p(float(input("3. Daya listrik (Watt): ")))
        year_built = np.log1p(float(input("4. Tahun dibangun: ")))
        bathrooms = np.log1p(float(input("5. Jumlah kamar mandi: ")))
        bedrooms = np.log1p(float(input("6. Jumlah kamar tidur: ")))
        carports = np.log1p(float(input("7. Jumlah carport: ")))
        garages = np.log1p(float(input("8. Jumlah garasi: ")))
        floors = np.log1p(float(input("9. Jumlah lantai: ")))

        city = input("10. Lokasi kota: ").strip()
        city_features = ["Bogor", "Depok", "Jakarta Barat", "Jakarta Pusat", "Jakarta Selatan",
                         "Jakarta Timur", "Jakarta Utara", "Tangerang", "Bekasi"]
        city_input = [1 if city == c else 0 for c in city_features]

        input_features = np.array([[building_size, land_size, electricity, year_built,
                                     bathrooms, bedrooms, carports, garages, floors] + city_input])

        predicted_log_price = random_forest.predict(input_features)
        predicted_price = np.expm1(predicted_log_price)
        print(f"\nPrediksi harga rumah: Rp {predicted_price[0]:,.2f}")

        y_pred = random_forest.predict(X_test)
        mse, rmse, mae, r2 = evaluate_model(y_test, y_pred)
        print(f"Evaluasi - MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}")
    except ValueError:
        print("Input tidak valid.")

# Fungsi Utama
def main():
    MODEL_FILE = "random_forest_model.joblib"
    file_path = '/content/balanced_dataset.csv'

    print("Memuat dan memproses dataset...")
    X_train, X_test, y_train, y_test, selected_features = load_and_preprocess_data(file_path)

    try:
        print("Mencoba memuat model dari file...")
        random_forest = joblib.load(MODEL_FILE)
        print("Model berhasil dimuat.")
    except FileNotFoundError:
        print("Model tidak ditemukan. Melatih model baru...")
        random_forest = RandomForestRegressorFromScratch(n_estimators=10, max_depth=10, min_samples_split=5)
        random_forest.fit(X_train, y_train)
        joblib.dump(random_forest, MODEL_FILE)
        print("Model disimpan.")

    user_input_prediction_with_evaluation(random_forest, selected_features, X_test, y_test)

if __name__ == "__main__":
    main()
