# Домшнее задание 3

## Задача:  

- Создать Dataset для загрузки данных (sklearn.datasets.fetch_california_housing)  
- Обернуть его в Dataloader  
- Написать архитектуру сети, которая предсказывает стоимость недвижимости. Сеть должна включать BatchNorm слои и Dropout (или НЕ включать, но нужно обосновать)  
- Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели train-test разделение нужно сделать с помощью sklearn random_state=13, test_size = 0.25  

In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch.nn import MSELoss, Linear, ReLU, Dropout, BatchNorm1d, Sequential, Sigmoid
from torch.optim import SGD, Adam, RMSprop
from torchsummary import summary
from tqdm import trange


 ### 1. Создать Dataset для загрузки данных

In [2]:
houses = fetch_california_housing()
x, y = houses.data, houses.target
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=13, test_size = 0.25)

In [3]:
def torch_StandardScaler(x):
    m = x.mean(0, keepdim=True)
    s = x.std(0, unbiased=False, keepdim=True)
    x -= m
    x /= s
    return x

In [4]:
x_train = torch_StandardScaler(torch.FloatTensor(x_train))
y_train = torch.FloatTensor(y_train).view(-1, 1)

x_test = torch_StandardScaler(torch.FloatTensor(x_test))
y_test = torch.FloatTensor(y_test).view(-1, 1)

In [5]:
train_data = TensorDataset(x_train, y_train)
test_data = TensorDataset(x_test,  y_test)

### 2. Обернуть его в Dataloader

In [6]:
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)
print(len(train_loader), len(test_loader))


242 81


### 3. Написать архитектуру сети, которая предсказывает стоимость недвижимости.

In [7]:
def great_california_model(input_count, output_count, layers_count=3, multiplier=4, batch=True, drop=True):
        
    hidden_layers_coef = list(range(layers_count * multiplier, 0, -multiplier) )
    hidden_layers_coef.append(output_count)
    
    in_f = 0
    out_f = 0
    layer_list = []
    for layer_i in range(layers_count):
        match layer_i:
            case 0:
                in_f = input_count                
            case _ if layer_i > 0:
                in_f = hidden_layers_coef[layer_i] * input_count
        match layer_i:
            case _ if layer_i != layers_count - 1:
                out_f = hidden_layers_coef[layer_i + 1] * input_count
            case _:
                out_f = output_count

        layer_list.append(Linear(in_f, out_f))
        
        if layer_i != layers_count - 1:            
            if batch: layer_list.append(BatchNorm1d(out_f))
            layer_list.append(ReLU())
            if drop: layer_list.append(Dropout(0.25))  
    return Sequential(*layer_list)

In [8]:

CaliforniaNet = great_california_model(8,1).cuda()



In [9]:
summary(CaliforniaNet, (8,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 64]             576
       BatchNorm1d-2                   [-1, 64]             128
              ReLU-3                   [-1, 64]               0
           Dropout-4                   [-1, 64]               0
            Linear-5                   [-1, 32]           2,080
       BatchNorm1d-6                   [-1, 32]              64
              ReLU-7                   [-1, 32]               0
           Dropout-8                   [-1, 32]               0
            Linear-9                    [-1, 1]              33
Total params: 2,881
Trainable params: 2,881
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.01
Estimated Total Size (MB): 0.01
-----------------------------------------------

In [10]:
CaliforniaNet

Sequential(
  (0): Linear(in_features=8, out_features=64, bias=True)
  (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Dropout(p=0.25, inplace=False)
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Dropout(p=0.25, inplace=False)
  (8): Linear(in_features=32, out_features=1, bias=True)
)

### 4. Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели

In [11]:
optimizer_list = [SGD, Adam, RMSprop]

In [12]:
epochs = 100
log_interval = 20
result_research = dict()
form = len(str(epochs))
criterion = MSELoss()
for optimizer_func in optimizer_list:
    model = great_california_model(8,1).cuda()
    if optimizer_func == torch.optim.SGD:
        optimizer = optimizer_func(model.parameters(), lr=0.01, momentum=0.9)
    else:
        optimizer = optimizer_func(model.parameters())    
    optimizer_name = optimizer.__class__.__name__
    print(optimizer_name)
    numder_of_batchs = len(train_loader)
    for epoch in range(epochs):
        running_loss = 0.0
        model.train()
        with trange(numder_of_batchs, desc=f'Epoch {(epoch + 1):0{form}}/{epochs}') as t:
            # t.set_postfix_str(f'optimizer=>{optimizer_name}')
            for batch_idx, (sample, target) in zip(t, train_loader):
                
                sample = sample.cuda()
                target = target.cuda()

                optimizer.zero_grad()
                output = model(sample)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

                if batch_idx % log_interval == 0 and not batch_idx:
                    t.set_postfix_str(f'optimizer=>{optimizer_name}, loss={loss.item():.3f}')
                if batch_idx == numder_of_batchs - 1:
                    t.set_postfix_str(f'optimizer=>{optimizer_name}, loss={(running_loss / numder_of_batchs):.3f}')
        if epoch in [9, 49, 99]:           
            model.eval()
            epo = epoch + 1
            if epo not in result_research:
                result_research[epo] = dict()
            with torch.no_grad():
                output = model(x_test.cuda())
                loss_out = torch.sqrt(criterion(output, y_test.cuda()))
                # print(f'Лосс на модели {loss_out.item()}')
                result_research[epo][optimizer_name] = loss_out.item()

SGD


Epoch 001/100: 100%|██████████| 242/242 [00:00<00:00, 382.52it/s, optimizer=>SGD, loss=0.775]
Epoch 002/100: 100%|██████████| 242/242 [00:00<00:00, 367.43it/s, optimizer=>SGD, loss=0.548]
Epoch 003/100: 100%|██████████| 242/242 [00:00<00:00, 383.13it/s, optimizer=>SGD, loss=0.496]
Epoch 004/100: 100%|██████████| 242/242 [00:00<00:00, 401.54it/s, optimizer=>SGD, loss=0.502]
Epoch 005/100: 100%|██████████| 242/242 [00:00<00:00, 407.64it/s, optimizer=>SGD, loss=0.473]
Epoch 006/100: 100%|██████████| 242/242 [00:00<00:00, 381.32it/s, optimizer=>SGD, loss=0.475]
Epoch 007/100: 100%|██████████| 242/242 [00:00<00:00, 364.67it/s, optimizer=>SGD, loss=0.468]
Epoch 008/100: 100%|██████████| 242/242 [00:00<00:00, 395.01it/s, optimizer=>SGD, loss=0.455]
Epoch 009/100: 100%|██████████| 242/242 [00:00<00:00, 359.26it/s, optimizer=>SGD, loss=0.455]
Epoch 010/100: 100%|██████████| 242/242 [00:00<00:00, 321.57it/s, optimizer=>SGD, loss=0.449]
Epoch 011/100: 100%|██████████| 242/242 [00:00<00:00, 390.55

Adam


Epoch 001/100: 100%|██████████| 242/242 [00:00<00:00, 290.68it/s, optimizer=>Adam, loss=1.296]
Epoch 002/100: 100%|██████████| 242/242 [00:00<00:00, 311.23it/s, optimizer=>Adam, loss=0.655]
Epoch 003/100: 100%|██████████| 242/242 [00:00<00:00, 303.43it/s, optimizer=>Adam, loss=0.574]
Epoch 004/100: 100%|██████████| 242/242 [00:00<00:00, 274.22it/s, optimizer=>Adam, loss=0.543]
Epoch 005/100: 100%|██████████| 242/242 [00:00<00:00, 311.63it/s, optimizer=>Adam, loss=0.521]
Epoch 006/100: 100%|██████████| 242/242 [00:00<00:00, 307.28it/s, optimizer=>Adam, loss=0.506]
Epoch 007/100: 100%|██████████| 242/242 [00:00<00:00, 273.29it/s, optimizer=>Adam, loss=0.495]
Epoch 008/100: 100%|██████████| 242/242 [00:00<00:00, 247.59it/s, optimizer=>Adam, loss=0.474]
Epoch 009/100: 100%|██████████| 242/242 [00:00<00:00, 326.33it/s, optimizer=>Adam, loss=0.452]
Epoch 010/100: 100%|██████████| 242/242 [00:00<00:00, 335.84it/s, optimizer=>Adam, loss=0.459]
Epoch 011/100: 100%|██████████| 242/242 [00:00<00:

RMSprop


Epoch 001/100: 100%|██████████| 242/242 [00:00<00:00, 342.97it/s, optimizer=>RMSprop, loss=0.645]
Epoch 002/100: 100%|██████████| 242/242 [00:00<00:00, 339.86it/s, optimizer=>RMSprop, loss=0.476]
Epoch 003/100: 100%|██████████| 242/242 [00:00<00:00, 310.14it/s, optimizer=>RMSprop, loss=0.466]
Epoch 004/100: 100%|██████████| 242/242 [00:00<00:00, 342.47it/s, optimizer=>RMSprop, loss=0.443]
Epoch 005/100: 100%|██████████| 242/242 [00:00<00:00, 340.14it/s, optimizer=>RMSprop, loss=0.430]
Epoch 006/100: 100%|██████████| 242/242 [00:00<00:00, 330.91it/s, optimizer=>RMSprop, loss=0.424]
Epoch 007/100: 100%|██████████| 242/242 [00:00<00:00, 344.93it/s, optimizer=>RMSprop, loss=0.424]
Epoch 008/100: 100%|██████████| 242/242 [00:00<00:00, 326.21it/s, optimizer=>RMSprop, loss=0.421]
Epoch 009/100: 100%|██████████| 242/242 [00:00<00:00, 345.20it/s, optimizer=>RMSprop, loss=0.418]
Epoch 010/100: 100%|██████████| 242/242 [00:00<00:00, 324.11it/s, optimizer=>RMSprop, loss=0.427]
Epoch 011/100: 100%|

In [13]:
print('Результаты теста:')
print(f'На кофигурации сети:\n{model}')
for j in result_research.keys():
    print(f'На {j} эпохах: ')
    for i in result_research[j].keys():
        print(f'{i} => Loss: {result_research[j][i]}')
    winner = [key for key, value in result_research[j].items() if value == min(result_research[j].values())]   
    print(f'Лучший результат показала функция оптимизации {winner}\n')

        

Результаты теста:
На кофигурации сети:
Sequential(
  (0): Linear(in_features=8, out_features=64, bias=True)
  (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Dropout(p=0.25, inplace=False)
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Dropout(p=0.25, inplace=False)
  (8): Linear(in_features=32, out_features=1, bias=True)
)
На 10 эпохах: 
SGD => Loss: 0.679122269153595
Adam => Loss: 0.9135489463806152
RMSprop => Loss: 1.226797103881836
Лучший результат показала функция оптимизации ['SGD']

На 50 эпохах: 
SGD => Loss: 1.2081718444824219
Adam => Loss: 0.6837319731712341
RMSprop => Loss: 2.532555103302002
Лучший результат показала функция оптимизации ['Adam']

На 100 эпохах: 
SGD => Loss: 0.6326267719268799
Adam => Loss: 1.1722090244293213
RMSprop => Loss: 2.381446599960327
Лучший результат показала функция оптимиз