<a href="https://colab.research.google.com/github/Putu-Bagus-Arjawa/LinearRegressionGradientDescent/blob/main/Gradient_Descent_Linear_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
df = pd.read_csv("harga_rumah.csv")
X = df.drop("harga", axis=1)
y = df["harga"].values
X = pd.get_dummies(X, columns=["kabupaten_Jakarta", "kabupaten_Surabaya"], drop_first=True)
df.head()


Unnamed: 0,luas_tanah,harga,kabupaten_Jakarta,kabupaten_Surabaya
0,152,1226,1,0
1,485,1559,0,0
2,398,960,0,1
3,320,1394,1,0
4,156,1230,0,0


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X.values, y, test_size=0.2, random_state=42)

print("Ukuran Training Data X:", X_train.shape)
print("Ukuran Testing Data X:", X_test.shape)
print("Ukuran Training Data y:", y_train.shape)
print("Ukuran Testing Data y:", y_test.shape)

Ukuran Training Data X: (80, 3)
Ukuran Testing Data X: (20, 3)
Ukuran Training Data y: (80,)
Ukuran Testing Data y: (20,)


In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
class LinearRegressionGD:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs
        self.w = None
        self.b = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.epochs):
            y_pred = np.dot(X, self.w) + self.b
            dw = (1/n_samples) * np.dot(X.T, (y_pred - y))
            db = (1/n_samples) * np.sum(y_pred - y)

            self.w -= self.lr * dw
            self.b -= self.lr * db

    def predict(self, X):
        return np.dot(X, self.w) + self.b

    def wb(self):
        return self.w, self.b


In [7]:
model = LinearRegressionGD(lr=0.1, epochs=25)
model.fit(X_train, y_train)

w, b = model.wb()
print("Weight(w): ", w)
print("Bias(b): ", b)



Weight(w):  [  29.21919128  -54.63362725 -122.69447781]
Bias(b):  1010.2523803921034


In [16]:
def prepare_input(luas_tanah, kota):
    kabupaten_Jakarta = 0
    kabupaten_Surabaya = 0

    if kota.lower() == "jakarta":
        kabupaten_Jakarta = 1
    elif kota.lower() == "surabaya":
        kabupaten_Surabaya = 1

    df = pd.DataFrame([{
        "luas_tanah": luas_tanah,
        "kabupaten_Jakarta": kabupaten_Jakarta,
        "kabupaten_Surabaya": kabupaten_Surabaya
    }])

    df_scaled = scaler.transform(df)
    return df_scaled
x_input = prepare_input(320, "Jakarta")
y_pred = model.predict(x_input)
print("Harga Tanahnya: ", y_pred[0])


Harga Tanahnya:  1045.403448283158




In [17]:
mse = np.mean((y_test - y_pred)**2)
rmse = np.sqrt(mse)

print("mse model: ", mse)
print("rmse model: ", rmse)

mse model:  190049.0786331063
rmse model:  435.9461877721909
