In [1]:
import pandas as pd
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data.dataset import random_split

## Загружаем данные [boston house prices](https://www.kaggle.com/datasets/vikrishnan/boston-house-prices)

In [2]:
boston_house_prices = r'archive/housing.csv'
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
labels_name = 'MEDV'

In [3]:
df = pd.read_csv(boston_house_prices,header=None, delimiter=r"\s+",names=column_names)

## Подготовка данных

In [4]:
features = torch.tensor(df.drop(labels_name, axis=1, inplace=False).values, dtype=torch.float32)
labels = torch.tensor(df[labels_name].to_frame().values, dtype=torch.float32)
labels = labels.squeeze()

In [5]:
features = torch.nn.functional.normalize(features, dim=0)
labels = torch.nn.functional.normalize(labels, dim=0)

In [6]:
dataset = TensorDataset(features, labels)

## Разделение данных на test и train

In [7]:
train_size = int(0.7 * len(dataset))
test_size = len(dataset) - train_size

In [8]:
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

In [9]:
data_iter = DataLoader(train_dataset, batch_size=train_size, shuffle=True)
test_iter = DataLoader(test_dataset, batch_size=test_size, shuffle=False)

## Создание класса модели

In [10]:
num_inputs = features.shape[1]
num_outputs = 1

In [11]:
model = torch.nn.Sequential(torch.nn.Linear(num_inputs, num_outputs))

In [12]:
loss = torch.nn.MSELoss(reduction='mean')

In [13]:
trainer = torch.optim.SGD(model.parameters(), lr=0.001)

## Обучение модели на train данных

In [14]:
num_epochs = 50
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        trainer.zero_grad()
        l = loss(model(X).reshape(-1), y)
        l.backward()
        trainer.step()
    l = loss(model(features).reshape(-1), labels)
    print(f'epoch {epoch}, loss: {l.item():.6f}')

epoch 1, loss: 0.006465
epoch 2, loss: 0.006443
epoch 3, loss: 0.006421
epoch 4, loss: 0.006399
epoch 5, loss: 0.006377
epoch 6, loss: 0.006355
epoch 7, loss: 0.006333
epoch 8, loss: 0.006312
epoch 9, loss: 0.006290
epoch 10, loss: 0.006269
epoch 11, loss: 0.006248
epoch 12, loss: 0.006226
epoch 13, loss: 0.006205
epoch 14, loss: 0.006184
epoch 15, loss: 0.006163
epoch 16, loss: 0.006142
epoch 17, loss: 0.006121
epoch 18, loss: 0.006100
epoch 19, loss: 0.006080
epoch 20, loss: 0.006059
epoch 21, loss: 0.006039
epoch 22, loss: 0.006018
epoch 23, loss: 0.005998
epoch 24, loss: 0.005978
epoch 25, loss: 0.005957
epoch 26, loss: 0.005937
epoch 27, loss: 0.005917
epoch 28, loss: 0.005897
epoch 29, loss: 0.005878
epoch 30, loss: 0.005858
epoch 31, loss: 0.005838
epoch 32, loss: 0.005819
epoch 33, loss: 0.005799
epoch 34, loss: 0.005780
epoch 35, loss: 0.005760
epoch 36, loss: 0.005741
epoch 37, loss: 0.005722
epoch 38, loss: 0.005703
epoch 39, loss: 0.005684
epoch 40, loss: 0.005665
epoch 41,

## Проверка качества модели на тестовых данных

In [15]:
model.eval()
with torch.no_grad():
    for X_test, y_test in test_iter:
        test_loss = loss(model(X_test).reshape(-1), y_test)
    print(f"test loss: {test_loss:.6f}")

test loss: 0.005423
