In [107]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import numpy as np
import torch

#### Dataset

In [108]:
# Load dataset
X, y = fetch_california_housing(download_if_missing=True, return_X_y=True)

In [109]:
# train, test, validation

# train & dataset(validation, test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)

# validation & test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f'Sizes\n\ntrain: {len(X_train)}\nvalidation:{len(X_test)}\ntest:{len(X_test)}')

Sizes

train: 14448
validation:3096
test:3096


#### Standard

In [110]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

#### Convert to Tensor

In [111]:
X_train_torch = torch.from_numpy(X_train).float()
X_val_torch = torch.from_numpy(X_val).float()
X_test_torch = torch.from_numpy(X_test).float()
y_train_torch = torch.from_numpy(y_train).float()
y_val_torch = torch.from_numpy(y_val).float()
y_test_torch = torch.from_numpy(y_test).float()

In [112]:
# transform data
train_dataset = list(zip(X_train_torch, y_train_torch))
val_dataset = list(zip(X_val_torch, y_val_torch))
test_dataset = list(zip(X_test_torch, y_test_torch))

features, label = train_dataset[10]

print(f'Features: {features}\n\nLabel: {label}')

Features: tensor([-0.4533,  0.9850, -0.6216, -0.3787,  0.0939,  0.1332, -0.8150,  0.7010])

Label: 1.281000018119812


#### DataLoader

In [113]:
# seed
np.random.seed(42)
torch.manual_seed(42)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, num_workers=1)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, num_workers=1)

train_batch, _label = next(iter(train_loader))
val_batch, _label = next(iter(val_loader))
test_batch, _label = next(iter(test_loader))

print('train\t', train_batch.shape)
print('val\t\t', val_batch.shape)
print('test\t', test_batch.shape)

train	 torch.Size([128, 8])
val		 torch.Size([32, 8])
test	 torch.Size([32, 8])


#### Perceptron

In [114]:
import torch.nn.functional as F
from torch import nn

class Perceptron(nn.Module):
    def __init__(self, input_dim, output_dim, activation='relu'):
        super().__init__()
        self.fc = nn.Linear(input_dim, output_dim)
        self.activation = activation

    def reset_parameters(self):
        self.fc.reset_parameters()

    def forward(self, x):
        x = self.fc(x)
        if self.activation == 'relu':
            x = F.relu(x)
        if self.activation == 'sigmoid':
            x = F.sigmoid(x)

        return x

class FeedForward(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.fc1 = Perceptron(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.dp1 = nn.Dropout(0.25)
        self.lay2 = 16
        self.fc2 = Perceptron(hidden_dim, self.lay2)
        self.bn2 = nn.BatchNorm1d(self.lay2)
        self.dp2 = nn.Dropout(0.25)
        self.fc3 = Perceptron(self.lay2, 1)

    def reset_parameters(self):
        self.fc1.reset_parameters()
        self.fc2.reset_parameters()
        self.fc3.reset_parameters()

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.dp1(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = self.dp2(x)

        x = self.fc3(x)
        return x

In [115]:
def train_model(epochs, optimizer):
    # clear weights
    price.reset_parameters()

    num_epochs = 20

    for epoch in range(num_epochs):
        running_loss, running_items = 0.0, 0.0
        val_loss, val_items, val_r2 = 0.0, 0.0, 0.0

        for i, data in enumerate(train_loader):
            inputs, labels = data
            labels = labels.view(-1, 1)

            inputs = inputs.to('cuda')
            labels = labels.to('cuda')

            optimizer.zero_grad()

            # predictions
            outputs = price(inputs)
            loss = F.mse_loss(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_items += len(labels)

        # R2
        with torch.no_grad():
            for val_data in val_loader:
                val_inputs, val_labels = val_data
                val_labels = val_labels.view(-1, 1)

                val_inputs = val_inputs.to('cuda')
                val_labels = val_labels.to('cuda')

                val_outputs = price(val_inputs)
                val_loss += F.mse_loss(val_outputs, val_labels).item()
                val_items += len(val_labels)

                val_labels_cpu = val_labels.cpu()
                val_outputs_cpu = val_outputs.cpu()
                val_r2 += r2_score(val_labels_cpu, val_outputs_cpu)

        # mean R^2
        val_r2 /= len(val_loader)


        print(f'Epoch [{epoch + 1}/{num_epochs}]. '
              f'Step [{len(train_loader)}/{len(train_loader)}]. '
              f'Loss: {running_loss / running_items:.3f}. '
              f'Val Loss: {val_loss / val_items:.3f}. '
              f'R^2: {val_r2:.3f}')

        # busy memory
        # allocated_memory = torch.cuda.memory_allocated()
        # cached_memory = torch.cuda.memory_reserved()

        # print(f"Allocated GPU memory: {allocated_memory} bytes")
        # print(f"Cached GPU memory: {cached_memory} bytes")

    print('Training is finished!')

    # clear GPU
    torch.cuda.empty_cache()

def new_model():
    price = FeedForward(8, 32)  # inputs: 8 features, hidden_layer 20
    price = price.to('cuda')

    return price

In [116]:
# SGD
price = new_model()
train_model(20, torch.optim.SGD(price.parameters(), lr=0.01, weight_decay=0.01))

Epoch [1/20]. Step [113/113]. Loss: 0.017. Val Loss: 0.035. R^2: 0.064
Epoch [2/20]. Step [113/113]. Loss: 0.007. Val Loss: 0.028. R^2: 0.258
Epoch [3/20]. Step [113/113]. Loss: 0.006. Val Loss: 0.024. R^2: 0.356
Epoch [4/20]. Step [113/113]. Loss: 0.005. Val Loss: 0.022. R^2: 0.421
Epoch [5/20]. Step [113/113]. Loss: 0.005. Val Loss: 0.020. R^2: 0.461
Epoch [6/20]. Step [113/113]. Loss: 0.005. Val Loss: 0.020. R^2: 0.473
Epoch [7/20]. Step [113/113]. Loss: 0.005. Val Loss: 0.019. R^2: 0.504
Epoch [8/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.018. R^2: 0.529
Epoch [9/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.018. R^2: 0.528
Epoch [10/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.017. R^2: 0.544
Epoch [11/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.018. R^2: 0.535
Epoch [12/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.561
Epoch [13/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.017. R^2: 0.557
Epoch [14/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.561
E

In [117]:
# Adam
price = FeedForward(8, 32)  # inputs: 8 features, hidden_layer 20
price = price.to('cuda')
train_model(20, torch.optim.Adam(price.parameters(), lr=0.01, weight_decay=0.01))

Epoch [1/20]. Step [113/113]. Loss: 0.010. Val Loss: 0.022. R^2: 0.413
Epoch [2/20]. Step [113/113]. Loss: 0.005. Val Loss: 0.018. R^2: 0.507
Epoch [3/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.018. R^2: 0.535
Epoch [4/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.017. R^2: 0.553
Epoch [5/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.017. R^2: 0.537
Epoch [6/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.570
Epoch [7/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.566
Epoch [8/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.579
Epoch [9/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.015. R^2: 0.584
Epoch [10/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.571
Epoch [11/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.589
Epoch [12/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.575
Epoch [13/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.570
Epoch [14/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.015. R^2: 0.586
E

In [118]:
# RMSprop
price = FeedForward(8, 32)  # inputs: 8 features, hidden_layer 20
price = price.to('cuda')
train_model(20, torch.optim.RMSprop(price.parameters(), lr=0.01, weight_decay=0.01))

Epoch [1/20]. Step [113/113]. Loss: 0.007. Val Loss: 0.021. R^2: 0.445
Epoch [2/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.018. R^2: 0.499
Epoch [3/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.018. R^2: 0.534
Epoch [4/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.017. R^2: 0.532
Epoch [5/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.557
Epoch [6/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.562
Epoch [7/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.015. R^2: 0.591
Epoch [8/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.566
Epoch [9/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.548
Epoch [10/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.578
Epoch [11/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.015. R^2: 0.585
Epoch [12/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.578
Epoch [13/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.015. R^2: 0.601
Epoch [14/20]. Step [113/113]. Loss: 0.004. Val Loss: 0.016. R^2: 0.574
E

In [119]:
def evaluate_model(model, data_loader):
    model.eval()

    total_loss = 0.0
    total_items = 0.0
    with torch.no_grad():
        for data in data_loader:
            inputs, labels = data
            labels = labels.view(-1, 1)

            inputs = inputs.to('cuda')
            labels = labels.to('cuda')

            outputs = model(inputs)
            loss = F.mse_loss(outputs, labels, reduction='sum')
            total_loss += loss.item()
            total_items += len(labels)

    mean_loss = total_loss / total_items
    return mean_loss

# Проверка модели на тестовых данных
test_loss = evaluate_model(price, test_loader)
print(f'Test Loss: {test_loss:.3f}')

Test Loss: 0.397


Conclusion: Adam best