##Чтение библиотек

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader


In [None]:
batch_size = 10

# Чтение данных

Предложенные данные (https://www.kaggle.com/datasets/vikrishnan/boston-house-prices) не имеют заголовков. Их нужно создать. Так как в задаче не требуется изучить природу данных, то достаточно создать стандартные заголовки, а также определить таргет.

In [None]:
df = pd.read_csv('housing.csv',header=None, delimiter=r"\s+",names=[f'col{i}' for i in range (13)] + ['tg'])

df.head(5)

Unnamed: 0,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,tg
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


Проверим размер.

In [None]:
df.shape

(506, 14)

Создадим служебный фрейм для того, чтобы разбить данные на тренировочные и тестовые выборки и разобъём их на части.

In [None]:
df_wotg = df.drop('tg', axis=1)

In [None]:
x_train, x_test, y_train, y_test  = train_test_split(df_wotg, df['tg'], test_size=0.2)

In [None]:
print(x_train.shape)
print(x_test.shape)

print(y_train.shape)
print(y_test.shape)

(404, 13)
(102, 13)
(404,)
(102,)


Изменим тип данных на тензор. Для тренировочного набора признаков зададим необходимость считать градиент.

In [None]:
x_train_tensor = torch.tensor(x_train.values, dtype=torch.float32, requires_grad = True)
x_train_tensor.shape

torch.Size([404, 13])

In [None]:
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
y_train_tensor.shape

torch.Size([404])

In [None]:
x_test_tensor = torch.tensor(x_test.values, dtype=torch.float32)
x_test_tensor.shape

torch.Size([102, 13])

In [None]:
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
y_test_tensor.shape

torch.Size([102])

# Создание модели
Создадим dataset и создадим dataloader.

In [None]:
dataset = TensorDataset(x_train_tensor, y_train_tensor)
data_iter = DataLoader(dataset, batch_size, shuffle=True)

for X, y in data_iter:
    print(X, y)
    break

tensor([[4.3571e-01, 0.0000e+00, 1.0590e+01, 1.0000e+00, 4.8900e-01, 5.3440e+00,
         1.0000e+02, 3.8750e+00, 4.0000e+00, 2.7700e+02, 1.8600e+01, 3.9690e+02,
         2.3090e+01],
        [4.3790e-02, 8.0000e+01, 3.3700e+00, 0.0000e+00, 3.9800e-01, 5.7870e+00,
         3.1100e+01, 6.6115e+00, 4.0000e+00, 3.3700e+02, 1.6100e+01, 3.9690e+02,
         1.0240e+01],
        [4.8357e+00, 0.0000e+00, 1.8100e+01, 0.0000e+00, 5.8300e-01, 5.9050e+00,
         5.3200e+01, 3.1523e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 3.8822e+02,
         1.1450e+01],
        [3.7662e+01, 0.0000e+00, 1.8100e+01, 0.0000e+00, 6.7900e-01, 6.2020e+00,
         7.8700e+01, 1.8629e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 1.8820e+01,
         1.4520e+01],
        [2.8656e+01, 0.0000e+00, 1.8100e+01, 0.0000e+00, 5.9700e-01, 5.1550e+00,
         1.0000e+02, 1.5894e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 2.1097e+02,
         2.0080e+01],
        [1.8085e+01, 0.0000e+00, 1.8100e+01, 0.0000e+00, 6.7900e-01, 6.4340e+00,

Создадим простую модель. 13 входов - по числу входных данных.

In [None]:
model = torch.nn.Sequential(torch.nn.Linear(13, 1))

In [None]:
model

Sequential(
  (0): Linear(in_features=13, out_features=1, bias=True)
)

In [None]:
model[0].weight.data

tensor([[-0.2071,  0.1315,  0.1448,  0.1009,  0.2049, -0.1843, -0.1632, -0.0078,
         -0.0763,  0.1656, -0.2515,  0.1225, -0.1468]])

In [None]:
model[0].bias.data

tensor([0.1795])

In [None]:
loss = torch.nn.MSELoss(reduction='mean')

In [None]:
trainer = torch.optim.SGD(model.parameters(), lr=0.000001)

#Обучение 

In [None]:
num_epochs = 1000
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        trainer.zero_grad()
        l = loss(model(X).reshape(-1), y)
        l.backward()
        trainer.step()
    l = loss(model(x_train_tensor).reshape(-1), y_train_tensor)
    if epoch % 5 == 0:
        print('epoch %d, loss: %f' % (epoch, l.item()),'|\tw', model[0].weight.data, '|\tb', model[0].bias.data)

epoch 5, loss: 112.890488 |	w tensor([[-0.2078,  0.1139,  0.1430,  0.1011,  0.2050, -0.1817, -0.1225, -0.0094,
         -0.0800,  0.0363, -0.2508,  0.0717, -0.1510]]) |	b tensor([-0.1937])
epoch 10, loss: 89.412712 |	w tensor([[-0.2069,  0.1050,  0.1431,  0.1013,  0.2052, -0.1783, -0.0823, -0.0101,
         -0.0800,  0.0226, -0.2475,  0.0645, -0.1543]]) |	b tensor([-0.1934])
epoch 15, loss: 82.144997 |	w tensor([[-0.2062,  0.1002,  0.1427,  0.1015,  0.2053, -0.1747, -0.0506, -0.0103,
         -0.0799,  0.0218, -0.2438,  0.0655, -0.1580]]) |	b tensor([-0.1931])
epoch 20, loss: 144.483353 |	w tensor([[-0.2059,  0.0994,  0.1411,  0.1016,  0.2055, -0.1713, -0.0290, -0.0103,
         -0.0799,  0.0046, -0.2409,  0.0575, -0.1633]]) |	b tensor([-0.1928])
epoch 25, loss: 76.834068 |	w tensor([[-0.2050,  0.1003,  0.1401,  0.1018,  0.2056, -0.1678, -0.0091, -0.0100,
         -0.0790,  0.0164, -0.2373,  0.0616, -0.1684]]) |	b tensor([-0.1924])
epoch 30, loss: 82.706429 |	w tensor([[-0.2045,  0.103

In [None]:
model(x_test_tensor)-y_test_tensor

tensor([[  6.0600,   6.5600,  -1.5400,  ...,  -7.9400,  10.8600, -10.9400],
        [ -6.1418,  -5.6418, -13.7418,  ..., -20.1418,  -1.3418, -23.1418],
        [  2.9347,   3.4347,  -4.6653,  ..., -11.0653,   7.7347, -14.0653],
        ...,
        [  9.4813,   9.9813,   1.8813,  ...,  -4.5187,  14.2813,  -7.5187],
        [ -2.9451,  -2.4451, -10.5451,  ..., -16.9451,   1.8549, -19.9451],
        [ 14.0735,  14.5735,   6.4735,  ...,   0.0735,  18.8735,  -2.9265]],
       grad_fn=<SubBackward0>)