In [2]:
# Import library
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
data = pd.read_csv('/content/audi.csv')  # Ganti dengan path dataset Anda

# Tampilkan lima data teratasuntuk memastikan dataset terbaca
print(data.head())

# Data preprocessing
# Tambahkan kolom 'age' untuk mengganti 'year'
data['age'] = 2024 - data['year']  # Misalnya tahun sekarang 2024
data.drop(columns=['year'], inplace=True)  # Hapus kolom 'year' karena sudah diolah

# Konversi fitur kategorikal ke numerik menggunakan One-Hot Encoding before handling missing values
data = pd.get_dummies(data, columns=['model', 'transmission', 'fuelType'], drop_first=True)

# Tangani missing values (jika ada) after oencone-hot ding
data.fillna(data.median(), inplace=True) # Now that all columns are numeric, median can be calculated

# Split data menjadi fitur (X) dan target (y)
X = data.drop(columns=['price'])  # Semua kolom kecuali harga
y = data['price']  # Target prediksi

# Split dataset menjadi train set dan test set (80:20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Inisialisasi dan training model Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Prediksi pada data test
y_pred = model.predict(X_test)

# Evaluasi model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R² Score:", r2)

# Contoh prediksi
sample_data = X_test.iloc[0:5]  # Ambil 5 data dari test set
sample_prediction = model.predict(sample_data)
print("Sample Prediksi Harga Mobil:")
print(sample_prediction)

  model  year  price transmission  mileage fuelType  tax   mpg  engineSize
0    A1  2017  12500       Manual    15735   Petrol  150  55.4         1.4
1    A6  2016  16500    Automatic    36203   Diesel   20  64.2         2.0
2    A1  2016  11000       Manual    29946   Petrol   30  55.4         1.4
3    A4  2017  16800    Automatic    25952   Diesel  145  67.3         2.0
4    A3  2019  17300       Manual     1998   Petrol  145  49.6         1.0
Mean Absolute Error (MAE): 1526.6150547910124
Mean Squared Error (MSE): 5267830.28019731
R² Score: 0.9651435999245749
Sample Prediksi Harga Mobil:
[ 9139.78   20698.09   30069.0975 26196.64   15915.05  ]


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("adityadesai13/used-car-dataset-ford-and-mercedes")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/adityadesai13/used-car-dataset-ford-and-mercedes?dataset_version_number=3...


100%|██████████| 1.10M/1.10M [00:00<00:00, 53.1MB/s]

Extracting files...
Path to dataset files: /root/.cache/kagglehub/datasets/adityadesai13/used-car-dataset-ford-and-mercedes/versions/3



