# Polynomial Regression: 
## Regression dengan polinomial untuk menangani hubungan non-linear.

### Studycase
#### Prediksi Harga Rumah di tebet dengan Regresi Polinomial

##### Import Library


In [28]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline
import plotly.graph_objects as go
from scipy.stats import gaussian_kde

#### Load Data

In [29]:
tebet_df = tebet_df = pd.read_excel('dataset/DATA RUMAH TEBET.xlsx')
tebet_df

Unnamed: 0,NO,NAMA RUMAH,HARGA,LB,LT,KT,KM,GRS
0,1,"Rumah Murah Hook Tebet Timur, Tebet, Jakarta S...",3800000000,220,220,3,3,0
1,2,"Rumah Modern di Tebet dekat Stasiun, Tebet, Ja...",4600000000,180,137,4,3,2
2,3,"Rumah Mewah 2 Lantai Hanya 3 Menit Ke Tebet, T...",3000000000,267,250,4,4,4
3,4,"Rumah Baru Tebet, Tebet, Jakarta Selatan",430000000,40,25,2,2,0
4,5,"Rumah Bagus Tebet komp Gudang Peluru lt 350m, ...",9000000000,400,355,6,5,3
...,...,...,...,...,...,...,...,...
1005,1006,Rumah Strategis Akses Jalan 2mobil Di Menteng ...,9000000000,450,550,10,10,3
1006,1007,Tebet Rumah Siap Huni Jln 2 Mbl Nyaman,4000000000,160,140,4,3,2
1007,1008,"Di Kebun Baru Rumah Terawat, Area Strategis",4000000000,139,230,4,4,1
1008,1009,Dijual Cepat Rumah Komp Depkeu Dr Soepomo Tebe...,19000000000,360,606,7,4,0


#### EDA

In [30]:
# Extract the data for KDE
x = tebet_df['HARGA']

# Calculate the KDE
kde = gaussian_kde(x, bw_method=0.5)
x_range = np.linspace(x.min(), x.max(), 100)
kde_values = kde(x_range)

# Create the KDE plot using Plotly
fig = go.Figure()

# Add the KDE line
fig.add_trace(go.Scatter(x=x_range, y=kde_values, mode='lines', name='KDE'))

# Add the histogram for reference
fig.add_trace(go.Histogram(x=x, histnorm='probability density', nbinsx=50, name='Histogram', opacity=0.5))

# Update the layout for better visualization
fig.update_layout(
    title='Density Plot of HARGA',
    xaxis_title='HARGA',
    yaxis_title='Density',
    showlegend=True,
    bargap=0.1
)

# Display the plot
fig.show()



##### Analyst

In [31]:
tebet_df.describe()

Unnamed: 0,NO,HARGA,LB,LT,KT,KM,GRS
count,1010.0,1010.0,1010.0,1010.0,1010.0,1010.0,1010.0
mean,505.5,7628987000.0,276.539604,237.432673,4.668317,3.607921,1.920792
std,291.706188,7340946000.0,177.864557,179.957604,1.572776,1.420066,1.510998
min,1.0,430000000.0,40.0,25.0,2.0,1.0,0.0
25%,253.25,3262500000.0,150.0,130.0,4.0,3.0,1.0
50%,505.5,5000000000.0,216.5,165.0,4.0,3.0,2.0
75%,757.75,9000000000.0,350.0,290.0,5.0,4.0,2.0
max,1010.0,65000000000.0,1126.0,1400.0,10.0,10.0,10.0


In [32]:
print('Missing values per column:\n', tebet_df.isnull().sum())

Missing values per column:
 NO            0
NAMA RUMAH    0
HARGA         0
LB            0
LT            0
KT            0
KM            0
GRS           0
dtype: int64


In [33]:
print("Total duplicate: ", tebet_df.duplicated().sum())

Total duplicate:  0


### Fitur Selection
#### Pemilihan fitur yang digunakan untuk prediksi.


In [34]:
X = tebet_df[['LB', 'LT', 'KT', 'KM', 'GRS']]  # Memasukkan semua fitur
y = tebet_df['HARGA']


### Data Splitting
#### Membagi data menjadi data latih dan data uji

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


### Scaling
#### melakukan standarisasi pada fitur.

In [36]:
# Inisialisasi StandardScaler
scaler = StandardScaler()

# Fit pada X_train dan transformasi X_train dan X_test
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


#### Modeling dengan Polynomial Regression


In [37]:
# Pipeline for Polynomial Regression
model = Pipeline([
    ('poly', PolynomialFeatures(degree=2, include_bias=False)),
    ('linear', LinearRegression())
])

In [38]:
# Fitting and predicting
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

In [39]:
#Evaluasi matrik
# Prediksi pada data test
y_pred = model.predict(X_test_scaled)
# Hitung MSE, RMSE, dan R-squared
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R-squared: {r2}")

MSE: 1.095543583525105e+19
RMSE: 3309899671.4781327
R-squared: 0.7653218458616602


#### Visualisasi dan hasil prediksi

In [40]:
# Visualization of predictions
fig = go.Figure()
fig.add_trace(go.Scatter(x=y_test, y=y_pred, mode='markers', name='Predicted vs Actual'))
fig.add_trace(go.Scatter(x=[min(y_test), max(y_test)], y=[min(y_test), max(y_test)], mode='lines', line=dict(color='red', dash='dash'), name='Ideal Prediction'))
fig.update_layout(
    title='Polynomial Regression: Predicted vs Actual Prices',
    xaxis_title='Actual Prices',
    yaxis_title='Predicted Prices',
    showlegend=True
)
fig.show()