## Задание

Реализуйте обучение нейронной сети из одного нейрона для задачи предсказания стоимости квартир[ boston house price](https://www.kaggle.com/vikrishnan/boston-house-prices)_s или[ california housing price](https://www.kaggle.com/datasets/camnugent/california-housing-prices)s с использованием pytorchых данных

Инструкция к выполнению задания:

1. Загрузите и подготовьте данные

2. Разделите данные на test и train

3. Создать модель (объект) класса. Для создания объекта можно использовать класс Sequential

4. Обучите модель на train данных

5. Проверьте качество модели на тестовых данных

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np

import torch
from torch.utils.data import TensorDataset, DataLoader

In [2]:
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
data = pd.read_csv('Data/housing.csv', header=None, delimiter=r"\s+", names=column_names)
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   CRIM     506 non-null    float64
 1   ZN       506 non-null    float64
 2   INDUS    506 non-null    float64
 3   CHAS     506 non-null    int64  
 4   NOX      506 non-null    float64
 5   RM       506 non-null    float64
 6   AGE      506 non-null    float64
 7   DIS      506 non-null    float64
 8   RAD      506 non-null    int64  
 9   TAX      506 non-null    float64
 10  PTRATIO  506 non-null    float64
 11  B        506 non-null    float64
 12  LSTAT    506 non-null    float64
 13  MEDV     506 non-null    float64
dtypes: float64(12), int64(2)
memory usage: 55.5 KB


In [4]:
data.describe()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063,22.532806
std,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062,9.197104
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73,5.0
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95,17.025
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36,21.2
75%,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955,25.0
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97,50.0


In [5]:
data.isna().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
MEDV       0
dtype: int64

In [6]:
data.shape

(506, 14)

In [7]:
features = data.drop('MEDV', axis=1)
labels = data['MEDV']
features = torch.Tensor(np.array(features))
labels = torch.Tensor(np.array(labels))

features.shape, labels.shape

(torch.Size([506, 13]), torch.Size([506]))

In [8]:
dataset = TensorDataset(features, labels)

In [9]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [10]:
len(test_dataset)

102

In [11]:
model = torch.nn.Sequential(torch.nn.Linear(features.shape[1], 1))
torch.nn.init.xavier_uniform_(model[0].weight)
model

Sequential(
  (0): Linear(in_features=13, out_features=1, bias=True)
)

In [12]:
batch_size = 8
data_iter = DataLoader(train_dataset, batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Read a batch to see how it works
for X, y in data_iter:
    print(X, '\n', y)
    break

tensor([[1.4051e+01, 0.0000e+00, 1.8100e+01, 0.0000e+00, 5.9700e-01, 6.6570e+00,
         1.0000e+02, 1.5275e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 3.5050e+01,
         2.1220e+01],
        [1.2802e+01, 0.0000e+00, 1.8100e+01, 0.0000e+00, 7.4000e-01, 5.8540e+00,
         9.6600e+01, 1.8956e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 2.4052e+02,
         2.3790e+01],
        [1.5936e-01, 0.0000e+00, 6.9100e+00, 0.0000e+00, 4.4800e-01, 6.2110e+00,
         6.5000e+00, 5.7209e+00, 3.0000e+00, 2.3300e+02, 1.7900e+01, 3.9446e+02,
         7.4400e+00],
        [6.1470e-01, 0.0000e+00, 6.2000e+00, 0.0000e+00, 5.0700e-01, 6.6180e+00,
         8.0800e+01, 3.2721e+00, 8.0000e+00, 3.0700e+02, 1.7400e+01, 3.9690e+02,
         7.6000e+00],
        [7.3671e+00, 0.0000e+00, 1.8100e+01, 0.0000e+00, 6.7900e-01, 6.1930e+00,
         7.8100e+01, 1.9356e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 9.6730e+01,
         2.1520e+01],
        [1.0328e-01, 2.5000e+01, 5.1300e+00, 0.0000e+00, 4.5300e-01, 5.9270e+00,

In [13]:
loss = torch.nn.MSELoss(reduction='mean')
trainer = torch.optim.SGD(model.parameters(), lr=1e-8, maximize=False)

In [14]:
len(test_dataset)

102

In [15]:
num_epochs = 100
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        trainer.zero_grad()
        l = loss(model(X).reshape(-1), y)
        #print(model(X).reshape(-1), y)
        l.backward()
        trainer.step()
    test_loss = 0.0
    with torch.no_grad():
        for X_test, y_test in test_dataset:
            output = model(X_test).reshape(-1)
            test_loss += loss(output, y_test).item()
    l /= len(test_dataset)
    if epoch % 5 == 0:
        print('epoch %d, loss: %f' % (epoch, l.item()),'|\tw', model[0].weight.data, '|\tb', model[0].bias.data)

epoch 5, loss: 60.600796 |	w tensor([[-0.2196,  0.1346, -0.0591, -0.6012,  0.5083, -0.2807,  0.1961,  0.4521,
         -0.3407, -0.2474, -0.0031,  0.1833,  0.0753]]) |	b tensor([-0.1730])
epoch 10, loss: 14.100478 |	w tensor([[-0.2180,  0.1343, -0.0570, -0.6012,  0.5084, -0.2800,  0.2059,  0.4523,
         -0.3379, -0.1686, -0.0009,  0.2076,  0.0773]]) |	b tensor([-0.1729])
epoch 15, loss: 6.541384 |	w tensor([[-0.2171,  0.1332, -0.0562, -0.6012,  0.5084, -0.2800,  0.2079,  0.4521,
         -0.3365, -0.1402, -0.0007,  0.1959,  0.0779]]) |	b tensor([-0.1729])
epoch 20, loss: 2.694134 |	w tensor([[-0.2163,  0.1320, -0.0557, -0.6012,  0.5084, -0.2801,  0.2083,  0.4519,
         -0.3355, -0.1229, -0.0009,  0.1794,  0.0782]]) |	b tensor([-0.1729])
epoch 25, loss: 13.109694 |	w tensor([[-0.2157,  0.1310, -0.0554, -0.6012,  0.5084, -0.2802,  0.2084,  0.4517,
         -0.3346, -0.1094, -0.0011,  0.1631,  0.0783]]) |	b tensor([-0.1729])
epoch 30, loss: 2.909895 |	w tensor([[-0.2151,  0.1300, -0

In [16]:
model.eval()  # Переводим модель в режим оценки
test_loss = 0.0

# Итерируемся по тестовым данным
with torch.no_grad():  # Градиенты не нужны для оценки
    for inputs, targets in test_loader:
        # Прогнозируем выходные значения
        outputs = model(inputs)
        
        # Вычисляем потери для текущего пакета
        test_loss += loss(outputs, targets).item()

# Вычисляем среднее значение потерь на тестовом наборе данных
average_test_loss = test_loss / len(test_loader)

print('Average Test Loss:', average_test_loss)

Average Test Loss: 194.63382163414587
