<a href="https://colab.research.google.com/github/Kinnaruo/DeepLearning/blob/main/Week%202/Regression_Second_Hand.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [25]:
df = pd.read_csv('secondhanddataset.csv')
df.dropna(inplace=True)
df

Unnamed: 0,v.id,on road old,on road now,years,km,rating,condition,economy,top speed,hp,torque,current price
0,1,535651,798186,3,78945,1,2,14,177,73,123,351318.0
1,2,591911,861056,6,117220,5,9,9,148,74,95,285001.5
2,3,686990,770762,2,132538,2,8,15,181,53,97,215386.0
3,4,573999,722381,4,101065,4,3,11,197,54,116,244295.5
4,5,691388,811335,6,61559,3,9,12,160,53,105,531114.5
...,...,...,...,...,...,...,...,...,...,...,...,...
995,996,633238,743850,5,125092,1,6,11,171,95,97,190744.0
996,997,599626,848195,4,83370,2,9,14,161,101,120,419748.0
997,998,646344,842733,7,86722,1,8,9,196,113,89,405871.0
998,999,535559,732439,2,140478,4,5,9,184,112,128,74398.0


In [26]:
X = df.drop(columns=["current price"]).values
y = df["current price"].values

In [27]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Train target distribution:\n", pd.Series(y_train).describe())
print("\nTest target distribution:\n", pd.Series(y_test).describe())

y_train = scaler.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test = scaler.transform(y_test.reshape(-1, 1)).flatten()

Train target distribution:
 count       800.000000
mean     307772.246875
std      125148.920732
min       28226.500000
25%      204578.000000
50%      306995.250000
75%      413643.625000
max      584267.500000
dtype: float64

Test target distribution:
 count       200.000000
mean     311512.225000
std      129982.499405
min       58384.000000
25%      216705.375000
50%      301689.000000
75%      419805.500000
max      559074.000000
dtype: float64


# PyTorch

In [28]:
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

class MLPModel(nn.Module):
    def __init__(self):
        super(MLPModel, self).__init__()
        self.fc1 = nn.Linear(X.shape[1], 32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model_torch = MLPModel()
criterion = nn.MSELoss()
optimizer = optim.Adam(model_torch.parameters(), lr=0.01)

epochs = 500
for epoch in range(epochs):
    model_torch.train()
    optimizer.zero_grad()
    output = model_torch(X_train_torch)
    loss = criterion(output, y_train_torch)
    loss.backward()
    optimizer.step()

# TensorFlow

In [29]:
input_layer = keras.Input(shape=(X.shape[1],))
model_tf = Sequential([
    input_layer,
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1)
])
model_tf.compile(optimizer=Adam(learning_rate=0.01), loss='mse')

model_tf.fit(X_train, y_train, epochs=500, verbose=0, batch_size=4)

<keras.src.callbacks.history.History at 0x78b3baa6c510>

# Matriks Evaluasi

In [30]:
# Evaluasi Model PyTorch
model_torch.eval()
y_pred_torch = model_torch(X_test_torch).detach().numpy()

mse_torch = mean_squared_error(y_test, y_pred_torch)
rmse_torch = np.sqrt(mse_torch)
r2_torch = r2_score(y_test, y_pred_torch)

print(f"PyTorch:\nRMSE: {rmse_torch:.4f}\nMSE: {mse_torch:.4f}\nR2: {r2_torch:.4f}")

# Evaluasi Model TensorFlow
y_pred_tf = model_tf.predict(X_test)

mse_tf = mean_squared_error(y_test, y_pred_tf)
rmse_tf = np.sqrt(mse_tf)
r2_tf = r2_score(y_test, y_pred_tf)

print(f"TensorFlow:\nRMSE: {rmse_tf:.4f}\nMSE: {mse_tf:.4f}\nR2: {r2_tf:.4f}")

PyTorch:
RMSE: 0.0498
MSE: 0.0025
R2: 0.9977
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
TensorFlow:
RMSE: 0.0186
MSE: 0.0003
R2: 0.9997


##**Mean Squared Error (MSE):**
MSE mengukur rata-rata dari kuadrat selisih antara nilai aktual $y$ dan nilai prediksi $\hat{y}$. Semakin mendekati 0, berarti semakin kecil error yang dibuat model.

  $$
  MSE = \frac{1}{n} \sum (y_i - \hat{y}_i)^2
  $$

Di mana:  
- $y_i$ = Nilai aktual (data sebenarnya)  
- $\hat{y}_i$ = Nilai prediksi dari model  
- $n$ = Jumlah sampel  

##**Root Mean Squared Error (RMSE):**
RMSE adalah akar kuadrat dari MSE, sehingga unitnya sama dengan unit target. Sama seperti MSE, semakin mendekati 0, semakin bagus.
  $$
  RMSE = \sqrt{MSE}
  $$

##**R-squared (\( R^2 \)):**
$R^2$ mengukur seberapa baik model menjelaskan variabilitas data. Nilainya antara **0 dan 1**, semakin mendekati **1** semakin bagus. Jika nilainya lebih kecil dari 0, maka model lebih buruk daripada rata-rata.

  $$
  R^2 = 1 - \frac{\sum (y_i - \hat{y}_i)^2}{\sum (y_i - \bar{y})^2}
  $$