# 1Ô∏è‚É£ Blok 1: Import Library & Setup

In [10]:
# Import semua library yang dibutuhkan
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Library Machine Learning (Scikit-Learn)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.dummy import DummyRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Library Deep Learning (TensorFlow/Keras)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Setup visualisasi agar grafik terlihat rapi
sns.set(style="whitegrid")

# Fungsi untuk mengevaluasi model (akan dipanggil nanti)
def evaluate_model(name, y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"--- Evaluasi {name} ---")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"R2 Score: {r2:.4f}\n")
    return {'MAE': mae, 'MSE': mse, 'R2': r2}

print("‚úÖ Library berhasil di-load!")

‚úÖ Library berhasil di-load!


# 2Ô∏è‚É£ Blok 2: Load Dataset

In [11]:
# Mendefinisikan nama kolom (karena file asli tidak punya header)
cols = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
        'acceleration', 'model_year', 'origin', 'car_name']

# Membaca dataset
# delim_whitespace=True digunakan karena pemisah antar data adalah spasi
try:
    df = pd.read_csv('auto-mpg.data', names=cols, delim_whitespace=True)
    print("‚úÖ Dataset berhasil dibaca!")
    print("\n--- 5 Baris Pertama Data ---")
    display(df.head())
except FileNotFoundError:
    print("‚ùå Error: File 'auto-mpg.data' tidak ditemukan. Harap upload file dulu di menu sebelah kiri.")

‚úÖ Dataset berhasil dibaca!

--- 5 Baris Pertama Data ---


  df = pd.read_csv('auto-mpg.data', names=cols, delim_whitespace=True)


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,car_name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino


# 3Ô∏è‚É£ Blok 3: Data Cleaning & Preprocessing

In [12]:
# 1. Menangani nilai '?' pada kolom horsepower
print("Membersihkan data...")
df['horsepower'] = df['horsepower'].replace('?', np.nan)
df['horsepower'] = df['horsepower'].astype(float)

# 2. Mengisi nilai kosong (NaN) dengan Median
median_hp = df['horsepower'].median()
df['horsepower'] = df['horsepower'].fillna(median_hp) # Menggunakan assignment langsung untuk menghindari warning

# 3. Menghapus kolom 'car_name' (tidak dipakai prediksi)
df_clean = df.drop(columns=['car_name'])

# 4. Memisahkan Fitur (X) dan Target (y)
X = df_clean.drop(columns=['mpg'])
y = df_clean['mpg']

# 5. Split Data (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 6. Scaling (Penting untuk Deep Learning)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("‚úÖ Data Preparation selesai!")
print(f"Dimensi Training: {X_train.shape}")
print(f"Dimensi Testing: {X_test.shape}")

Membersihkan data...
‚úÖ Data Preparation selesai!
Dimensi Training: (318, 7)
Dimensi Testing: (80, 7)


# 4Ô∏è‚É£ Blok 4: Baseline Model

In [13]:
# Dictionary untuk menyimpan hasil semua model
results = {}

# Membuat Baseline Model (Dummy Regressor - Mean)
baseline = DummyRegressor(strategy="mean")
baseline.fit(X_train, y_train)

# Prediksi & Evaluasi
y_pred_base = baseline.predict(X_test)
results['Baseline'] = evaluate_model('Baseline Model', y_test, y_pred_base)

--- Evaluasi Baseline Model ---
MAE: 5.9554
MSE: 53.9833
R2 Score: -0.0040



# 5Ô∏è‚É£ Blok 5: Machine Learning (Random Forest)

In [14]:
# Membuat model Random Forest
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Prediksi & Evaluasi
y_pred_rf = rf_model.predict(X_test)
results['Random Forest'] = evaluate_model('Random Forest', y_test, y_pred_rf)

--- Evaluasi Random Forest ---
MAE: 1.5773
MSE: 4.5843
R2 Score: 0.9147



# 6Ô∏è‚É£ Blok 6: Deep Learning (Neural Network)

In [None]:
# Set seed agar hasil konsisten
tf.random.set_seed(42)

# Membuat arsitektur Neural Network
dl_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)), # Layer 1
    Dropout(0.2),                                                         # Mencegah overfitting
    Dense(32, activation='relu'),                                         # Layer 2
    Dense(1)                                                              # Output Layer
])

# Compile Model
dl_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Training Model (Epochs=100)
print("Sedang melatih Neural Network...")
history = dl_model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    verbose=0 # Ubah ke 1 jika ingin melihat log berjalan per baris
)

# Prediksi & Evaluasi
y_pred_dl = dl_model.predict(X_test_scaled).flatten()
print("\n")
results['Deep Learning'] = evaluate_model('Deep Learning', y_test, y_pred_dl)

Sedang melatih Neural Network...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 27ms/step


# 7Ô∏è‚É£ Blok 7: Visualisasi & Perbandingan Akhir

In [None]:
# Membuat DataFrame perbandingan
res_df = pd.DataFrame(results).T

print("=== TABEL PERBANDINGAN PERFORMA ===")
display(res_df)

# 1. Plot Bar Chart R2 Score
plt.figure(figsize=(10, 5))
sns.barplot(x=res_df.index, y=res_df['R2'], palette="viridis", hue=res_df.index, legend=False)
plt.title("Perbandingan R2 Score (Mendekati 1.0 = Lebih Baik)")
plt.ylabel("R2 Score")
plt.show()

# 2. Plot Loss History (Deep Learning)
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title("Grafik Pembelajaran Model Deep Learning (Loss)")
plt.xlabel("Epoch")
plt.ylabel("Loss (MSE)")
plt.legend()
plt.show()

# Blok 8: Finsishing

In [None]:
import os
import joblib
import matplotlib.pyplot as plt

# 1. BUAT STRUKTUR FOLDER DI COLAB
folders = ['data', 'models', 'images', 'src']
for folder in folders:
    os.makedirs(folder, exist_ok=True)
    print(f"‚úÖ Folder '{folder}' berhasil dibuat.")

# =========================================
# A. MENGISI FOLDER 'models' (Simpan Model)
# =========================================
# Simpan Baseline & Random Forest (format .pkl)
joblib.dump(baseline, 'models/model_baseline.pkl')
joblib.dump(rf_model, 'models/model_rf.pkl')

# Simpan Deep Learning (format .h5 atau .keras)
dl_model.save('models/model_dl.h5')
print("‚úÖ File model berhasil disimpan di folder 'models/'")

# =========================================
# B. MENGISI FOLDER 'images' (Simpan Grafik)
# =========================================
# Simpan Plot R2 Score
plt.figure(figsize=(10, 5))
sns.barplot(x=res_df.index, y=res_df['R2'], palette="viridis", hue=res_df.index, legend=False)
plt.title("Perbandingan Model R2 Score")
plt.ylabel("R2 Score")
plt.savefig('images/comparison_r2.png') # <--- Ini perintah simpannya
plt.close()

# Simpan Plot Loss History
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title("Training History")
plt.ylabel("Loss")
plt.legend()
plt.savefig('images/training_history.png') # <--- Ini perintah simpannya
plt.close()
print("‚úÖ File gambar berhasil disimpan di folder 'images/'")

# =========================================
# C. MENGISI FOLDER 'src' (Buat Script Python)
# =========================================
# Kita buat file python sederhana sebagai pelengkap
script_content = """
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def load_and_preprocess(filepath):
    # Load Data
    cols = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
            'acceleration', 'model_year', 'origin', 'car_name']
    df = pd.read_csv(filepath, names=cols, delim_whitespace=True)

    # Cleaning
    df['horsepower'] = df['horsepower'].replace('?', float('nan'))
    df['horsepower'] = df['horsepower'].astype(float)
    df['horsepower'] = df['horsepower'].fillna(df['horsepower'].median())
    df = df.drop(columns=['car_name'])

    return df

if __name__ == "__main__":
    df = load_and_preprocess('../data/auto-mpg.data')
    print("Data loaded successfully with shape:", df.shape)
"""

with open('src/data_loader.py', 'w') as f:
    f.write(script_content)
print("‚úÖ File script python berhasil disimpan di folder 'src/'")

# =========================================
# D. MENGISI FOLDER 'data'
# =========================================
# Copy file auto-mpg.data ke dalam folder data
import shutil
try:
    shutil.copy('auto-mpg.data', 'data/auto-mpg.data')
    print("‚úÖ Dataset berhasil disalin ke folder 'data/'")
except:
    print("‚ö†Ô∏è Pastikan file 'auto-mpg.data' sudah ada di Colab.")

print("\nüéâ SEMUA FILE SIAP! Silakan download dari menu Files di sebelah kiri.")