In [1]:
# 1. Download dataset
!gdown --id 1qiUDDoYyRLBiKOoYWdFl_5WByHE8Cugu


Downloading...
From: https://drive.google.com/uc?id=1qiUDDoYyRLBiKOoYWdFl_5WByHE8Cugu
To: /content/Auto_MPG_data.csv
100% 15.4k/15.4k [00:00<00:00, 29.7MB/s]


In [4]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [5]:
# 3. Cài đặt thiết bị tính toán
random_state = 59
np.random.seed(random_state)
torch.manual_seed(random_state)
if torch.cuda.is_available():
    torch.cuda.manual_seed(random_state)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
# 5. Read dataset
dataset_path = '/content/Auto_MPG_data.csv'
dataset = pd.read_csv(dataset_path)

In [10]:
# (a) Tách đặc trưng X và nhãn y
X = dataset.drop(columns='MPG').values
y = dataset['MPG'].values

# (b) Chia dữ liệu train/val/test
val_size = 0.2
test_size = 0.125
is_shuffle = True

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=val_size, random_state=random_state, shuffle=is_shuffle)

X_train, X_test, y_train, y_test = train_test_split(
    X_train, y_train, test_size=test_size, random_state=random_state, shuffle=is_shuffle)

# (c) Chuẩn hóa dữ liệu
normalizer = StandardScaler()
X_train = normalizer.fit_transform(X_train)
X_val = normalizer.transform(X_val)
X_test = normalizer.transform(X_test)

# Chuyển đổi sang tensor
X_train = torch.tensor(X_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)


In [11]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size = 32
train_dataset = CustomDataset(X_train, y_train)
val_dataset = CustomDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [12]:
# 8. Build MLP model
class MLP(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super().__init__()
        self.linear1 = nn.Linear(input_dims, hidden_dims)
        self.linear2 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)

    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        x = F.relu(x)
        out = self.output(x)
        return out.squeeze(1)

input_dims = X_train.shape[1]
output_dims = 1
hidden_dims = 64

model = MLP(input_dims=input_dims,
            hidden_dims=hidden_dims,
            output_dims=output_dims).to(device)

In [13]:
lr = 1e-2
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)


In [15]:
def r_squared(y_true, y_pred):
    y_true = torch.Tensor(y_true).to(device)
    y_pred = torch.Tensor(y_pred).to(device)
    mean_true = torch.mean(y_true)
    ss_tot = torch.sum((y_true - mean_true) ** 2)
    ss_res = torch.sum((y_true - y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2


In [16]:
epochs = 100
train_losses, val_losses = [], []
train_r2, val_r2 = [], []

for epoch in range(epochs):
    train_loss, val_loss = 0.0, 0.0
    train_target, train_predict = [], []
    val_target, val_predict = [], []

    # Training phase
    model.train()
    for X_samples, y_samples in train_loader:
        X_samples, y_samples = X_samples.to(device), y_samples.to(device)
        optimizer.zero_grad()
        outputs = model(X_samples)
        train_predict += outputs.tolist()
        train_target += y_samples.tolist()
        loss = criterion(outputs, y_samples)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    train_r2.append(r_squared(train_target, train_predict))

    # Validation phase
    model.eval()
    with torch.no_grad():
        for X_samples, y_samples in val_loader:
            X_samples, y_samples = X_samples.to(device), y_samples.to(device)
            outputs = model(X_samples)
            val_predict += outputs.tolist()
            val_target += y_samples.tolist()
            loss = criterion(outputs, y_samples)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    val_r2.append(r_squared(val_target, val_predict))

    print(f'EPOCH {epoch + 1}: Training loss: {train_loss:.3f}, Validation loss: {val_loss:.3f}')


EPOCH 1: Training loss: 282.769, Validation loss: 88.672
EPOCH 2: Training loss: 137.669, Validation loss: 72.346
EPOCH 3: Training loss: 71.007, Validation loss: 19.143
EPOCH 4: Training loss: 25.083, Validation loss: 196.176
EPOCH 5: Training loss: 96.139, Validation loss: 20.444
EPOCH 6: Training loss: 17.765, Validation loss: 9.444
EPOCH 7: Training loss: 18.486, Validation loss: 14.535
EPOCH 8: Training loss: 37.859, Validation loss: 37.427
EPOCH 9: Training loss: 17.133, Validation loss: 38.134
EPOCH 10: Training loss: 22.991, Validation loss: 41.183
EPOCH 11: Training loss: 26.723, Validation loss: 20.063
EPOCH 12: Training loss: 9.852, Validation loss: 5.594
EPOCH 13: Training loss: 15.143, Validation loss: 16.025
EPOCH 14: Training loss: 12.213, Validation loss: 12.023
EPOCH 15: Training loss: 14.222, Validation loss: 7.731
EPOCH 16: Training loss: 10.845, Validation loss: 18.904
EPOCH 17: Training loss: 12.312, Validation loss: 14.885
EPOCH 18: Training loss: 15.474, Validati

In [17]:
model.eval()
with torch.no_grad():
    y_hat = model(X_test)
    test_set_r2 = r_squared(y_hat, y_test)
    print('Evaluation on test set:')
    print(f'R2: {test_set_r2}')

Evaluation on test set:
R2: 0.8417800068855286
