In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

In [27]:
# Load dataset
data = pd.read_csv('/content/data.csv')

In [28]:
# Pilih fitur numerik yang relevan dan target
X = data[['Year', 'Engine HP', 'Engine Cylinders', 'highway MPG', 'city mpg', 'Popularity']].fillna(0)
y = data['MSRP']

In [29]:
# Bagi data menjadi train dan test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
# Model 1: Regresi Linear Sederhana
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)

In [31]:
# Model 2: Regresi Linear dengan Basis Fungsi Polinomial
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)
y_pred_poly = poly_model.predict(X_test_poly)

In [32]:
# Hitung metrik
def calculate_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    return mse, rmse, r2

mse_linear, rmse_linear, r2_linear = calculate_metrics(y_test, y_pred_linear)
mse_poly, rmse_poly, r2_poly = calculate_metrics(y_test, y_pred_poly)

In [33]:
# Cetak hasil
print("Linear Regression:")
print(f"MSE: {mse_linear}, RMSE: {rmse_linear}, R^2: {r2_linear}")

print("\nPolynomial Basis Regression:")
print(f"MSE: {mse_poly}, RMSE: {rmse_poly}, R^2: {r2_poly}")

Linear Regression:
MSE: 1061058627.7650267, RMSE: 32573.89488171512, R^2: 0.5548447760048814

Polynomial Basis Regression:
MSE: 639842631.2028921, RMSE: 25295.1108161813, R^2: 0.7315612140917198
