In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, HTML

### 2. import dataset

In [None]:
df = pd.read_csv('sample_data/House Price Prediction.csv')
print(df.head())  # Cek struktur data

   NO                                         NAMA RUMAH       HARGA   LB  \
0   1  Rumah Murah Hook Tebet Timur, Tebet, Jakarta S...  3800000000  220   
1   2  Rumah Modern di Tebet dekat Stasiun, Tebet, Ja...  4600000000  180   
2   3  Rumah Mewah 2 Lantai Hanya 3 Menit Ke Tebet, T...  3000000000  267   
3   4           Rumah Baru Tebet, Tebet, Jakarta Selatan   430000000   40   
4   5  Rumah Bagus Tebet komp Gudang Peluru lt 350m, ...  9000000000  400   

    LT  KT  KM  GRS      SKOR          TARGET  
0  220   3   3    0  0.143421        Worth it  
1  137   4   3    2  0.100435  Tidak Worth it  
2  250   4   4    4  0.232333        Worth it  
3   25   2   2    0  0.313953        Worth it  
4  355   6   5    3  0.108889  Tidak Worth it  


### 3. preproccecing data


In [None]:
print("\nSTEP 2: Preparing Data")
# Check if we have the expected columns
expected_columns = ['LB', 'LT', 'KT', 'KM', 'GRS', 'HARGA', 'TARGET']
missing_columns = [col for col in expected_columns if col not in df.columns]

if missing_columns:
    print(f"Warning: Missing columns in dataset: {missing_columns}")
    print("Please ensure your dataset has columns: LB, LT, KT, KM, GRS, HARGA, TARGET")
else:
    print("All required columns found in the dataset.")

# Extract features and target variables
X = df[['LB', 'LT', 'KT', 'KM', 'GRS']]
y_price = df['HARGA']

# Encode the TARGET column (Worth it / Tidak Worth it)
le = LabelEncoder()
y_worth = le.fit_transform(df['TARGET'])
worth_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("\nTarget Encoding:")
for label, value in worth_mapping.items():
    print(f"{label} -> {value}")


STEP 2: Preparing Data
All required columns found in the dataset.

Target Encoding:
Tidak Worth it -> 0
Worth it -> 1


### 4. pembagian data train dan test

In [None]:
print("\nSTEP 3: Splitting Data (80% Training, 20% Testing)")
X_train, X_test, y_price_train, y_price_test = train_test_split(X, y_price, test_size=0.2, random_state=42)
_, _, y_worth_train, y_worth_test = train_test_split(X, y_worth, test_size=0.2, random_state=42)

print(f"Training samples: {X_train.shape[0]}")
print(f"Testing samples: {X_test.shape[0]}")


STEP 3: Splitting Data (80% Training, 20% Testing)
Training samples: 808
Testing samples: 202


### 5. normalisasi data

In [46]:

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("Features standardized successfully.")

Features standardized successfully.


### 6. pelatihan model

In [47]:


# Train Price Prediction Model (Random Forest Regressor)
print("\nTraining Price Prediction Model (Random Forest Regressor)...")
price_model = RandomForestRegressor(n_estimators=100, random_state=42)
price_model.fit(X_train_scaled, y_price_train)

# Evaluate Price Model
y_price_pred = price_model.predict(X_test_scaled)
price_mse = mean_squared_error(y_price_test, y_price_pred)
price_rmse = np.sqrt(price_mse)
price_r2 = r2_score(y_price_test, y_price_pred)

print(f"Price Model Performance:")
print(f"MSE: {price_mse:.2f}")
print(f"RMSE: {price_rmse:.2f}")
print(f"R² Score: {price_r2:.4f}")

# Train Worth it Classification Model (Random Forest Classifier)
print("\nTraining Worth it Classification Model (Random Forest Classifier)...")
worth_model = RandomForestClassifier(n_estimators=100, random_state=42)
worth_model.fit(X_train_scaled, y_worth_train)

# Evaluate Worth it Model
y_worth_pred = worth_model.predict(X_test_scaled)
worth_accuracy = accuracy_score(y_worth_test, y_worth_pred)

print(f"Worth it Classification Model Performance:")
print(f"Accuracy: {worth_accuracy:.4f}")



Training Price Prediction Model (Random Forest Regressor)...
Price Model Performance:
MSE: 10694867775024037888.00
RMSE: 3270300869.19
R² Score: 0.7709

Training Worth it Classification Model (Random Forest Classifier)...
Worth it Classification Model Performance:
Accuracy: 0.7277


### Fungsi Prediksi

In [None]:
def predict_house(lb, lt, kt, km, grs):
    # Create features array
    features = np.array([[lb, lt, kt, km, grs]])

    # Scale the features
    features_scaled = scaler.transform(features)

    # Predict price
    predicted_price = price_model.predict(features_scaled)[0]

    # Predict worth it or not
    worth_it_class = worth_model.predict(features_scaled)[0]
    worth_it_label = list(worth_mapping.keys())[list(worth_mapping.values()).index(worth_it_class)]

    return {
        'predicted_price': predicted_price,
        'worth_it': worth_it_label
    }

### test

In [None]:
test_lb = 300  # Land Building (m²)
test_lt = 200  # Land Area (m²)
test_kt = 3    # Number of Bedrooms
test_km = 1   # Number of Bathrooms
test_grs = 1   # Number of Garage Spaces

# Get prediction
prediction = predict_house(test_lb, test_lt, test_kt, test_km, test_grs)

# Display result
print("\n" + "="*50)
print("HOUSE PRICE PREDICTION RESULT")
print("="*50)
print(f"Input Features:")
print(f"- Land Building (LB): {test_lb} m²")
print(f"- Land Area (LT): {test_lt} m²")
print(f"- Bedrooms (KT): {test_kt}")
print(f"- Bathrooms (KM): {test_km}")
print(f"- Garage Spaces (GRS): {test_grs}")
print("\nPrediction:")
print(f"- Predicted Price: Rp {prediction['predicted_price']:,.2f}")
print(f"- Worth it Status: {prediction['worth_it']}")
print("="*50)


HOUSE PRICE PREDICTION RESULT
Input Features:
- Land Building (LB): 300 m²
- Land Area (LT): 200 m²
- Bedrooms (KT): 3
- Bathrooms (KM): 1
- Garage Spaces (GRS): 1

Prediction:
- Predicted Price: Rp 5,884,055,555.55
- Worth it Status: Worth it




### 8. simpan model

In [None]:
# Save models
joblib.dump(price_model, 'price_model.pkl')
joblib.dump(worth_model, 'worth_model.pkl')
joblib.dump(scaler, 'feature_scaler.pkl')
joblib.dump(worth_mapping, 'worth_mapping.pkl')

['worth_mapping.pkl']