In [1]:
import math
import torch
import numpy as np
from sklearn.datasets import fetch_california_housing
import pandas as pd
import torchmetrics

california_housing = fetch_california_housing(as_frame=True)

In [2]:
california_housing.frame.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [3]:
california_housing.frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   MedInc       20640 non-null  float64
 1   HouseAge     20640 non-null  float64
 2   AveRooms     20640 non-null  float64
 3   AveBedrms    20640 non-null  float64
 4   Population   20640 non-null  float64
 5   AveOccup     20640 non-null  float64
 6   Latitude     20640 non-null  float64
 7   Longitude    20640 non-null  float64
 8   MedHouseVal  20640 non-null  float64
dtypes: float64(9)
memory usage: 1.4 MB


In [4]:
california_housing.frame.describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
count,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0
mean,3.870671,28.639486,5.429,1.096675,1425.476744,3.070655,35.631861,-119.569704,2.068558
std,1.899822,12.585558,2.474173,0.473911,1132.462122,10.38605,2.135952,2.003532,1.153956
min,0.4999,1.0,0.846154,0.333333,3.0,0.692308,32.54,-124.35,0.14999
25%,2.5634,18.0,4.440716,1.006079,787.0,2.429741,33.93,-121.8,1.196
50%,3.5348,29.0,5.229129,1.04878,1166.0,2.818116,34.26,-118.49,1.797
75%,4.74325,37.0,6.052381,1.099526,1725.0,3.282261,37.71,-118.01,2.64725
max,15.0001,52.0,141.909091,34.066667,35682.0,1243.333333,41.95,-114.31,5.00001


In [5]:
df = pd.DataFrame(data=california_housing.frame)


In [6]:
df.loc[(df.AveRooms > 6.052381), 'AveRooms'] = df['AveRooms'].mean()

In [7]:
df.loc[(df.AveBedrms > 1.099526), 'AveBedrms'] = df['AveBedrms'].mean()

In [8]:
df.loc[(df.Population > 1725.000000), 'Population'] = df['Population'].mean()

In [9]:
df.loc[(df.AveOccup > 3.282261), 'AveOccup'] = df['AveOccup'].mean()

In [10]:
df.describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
count,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0
mean,3.870671,28.639486,4.923431,1.039643,1084.574472,2.706404,35.631861,-119.569704,2.068558
std,1.899822,12.585558,0.773255,0.057514,397.322747,0.423443,2.135952,2.003532,1.153956
min,0.4999,1.0,0.846154,0.333333,3.0,0.692308,32.54,-124.35,0.14999
25%,2.5634,18.0,4.440716,1.006079,787.0,2.429741,33.93,-121.8,1.196
50%,3.5348,29.0,5.229129,1.04878,1166.0,2.818116,34.26,-118.49,1.797
75%,4.74325,37.0,5.429,1.096675,1425.476744,3.070655,37.71,-118.01,2.64725
max,15.0001,52.0,6.052381,1.099502,1725.0,3.282258,41.95,-114.31,5.00001


In [11]:
columns = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
target = 'MedHouseVal'

X = df[columns]
y = df[target]


In [12]:
import torch.nn.functional as F
import torch.nn as nn

In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    train_size=0.25, 
                                                    random_state=13)

In [14]:
class MyDataset(torch.utils.data.Dataset):
  
    def __init__(self, X, y):
        self.X = torch.Tensor(X)
        self.y = torch.from_numpy(y).float()

    def __len__(self):
        return self.X.shape[0]
  
    def __getitem__(self, index):
        return (self.X[index], self.y[index])

In [15]:

train_dataset = MyDataset(X_train.values, y_train.values)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True,
                                          num_workers=0, drop_last=False)

test_dataset = MyDataset(X_test.values, y_test.values)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=8, shuffle=True,
                                          num_workers=0, drop_last=False)

In [16]:
class Perceptron(nn.Module):
    def __init__(self, input_dim, output_dim, activation="relu"):
        super().__init__()
        self.fc = nn.Linear(input_dim, output_dim)
        self.activation = activation
        
    def forward(self, x):
        x = self.fc(x)
        if self.activation == "relu":
            return F.relu(x)
        if self.activation == "sigmoid":
            return F.sigmoid(x)
        raise RuntimeError
        

class FeedForward(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(FeedForward, self).__init__()
        self.bn1 = nn.BatchNorm1d(input_dim)
        self.fc1 = Perceptron(input_dim, 2 * hidden_dim)
        self.dp1 = nn.Dropout(0.25)
        self.bn2 = nn.BatchNorm1d(2 * hidden_dim)
        self.fc2 = Perceptron(2 * hidden_dim, hidden_dim)
        self.dp2 = nn.Dropout(0.25)
        self.bn3 = nn.BatchNorm1d(hidden_dim)
        self.fc3 = Perceptron(hidden_dim, 1)
        
    def forward(self, x):
        x = self.bn1(x)
        x = self.fc1(x)
        x = self.dp1(x)
        x = self.bn2(x)
        x = self.fc2(x)
        x = self.dp2(x)
        x = self.bn3(x)
        x = self.fc3(x)
        return x

In [17]:
def get_optimizer(model, optimizer_name = 'Adam'):
    optimizers = {'Adam': torch.optim.Adam(model.parameters(), lr=0.0005),
                  'RMSprop': torch.optim.RMSprop(model.parameters(), lr=0.005),
                  'SGD': torch.optim.SGD(model.parameters(), lr=0.0005, momentum=0.0)}
    return optimizers[optimizer_name]

In [18]:
def create_res_for_all_optimizers(num_epochs, criterion):
    res_r2_train = []
    res_r2_test = []
    optimizers = ['Adam', 'RMSprop', 'SGD']
    for optimizer_name in optimizers:
        
        model = FeedForward(8, 400)
        optimizer = get_optimizer(model, optimizer_name)
        train_r2 = torchmetrics.R2Score()
        test_r2 = torchmetrics.R2Score()

        r2_train_list = []
        r2_test_list = []
        
        for epoch in range(num_epochs):

            model.train()

            for i, data in enumerate(train_loader):
                inputs, target = data[0], data[1]
                optimizer.zero_grad()
                outputs = model(inputs)
                batch_r2 = train_r2(outputs.squeeze(), target)
                # Рассчитываем лосс-функцию (функцию потерь)
                loss = criterion(outputs.squeeze(), target)
                loss.backward()
                optimizer.step()

            model.eval()

            for i, data in enumerate(test_loader):
                inputs, target = data[0], data[1]
                outputs = model(inputs)
                test_r2(outputs.squeeze(), target)

            # total r2 over all training batches
            total_train_r2 = train_r2.compute()
            # total r2 over all training batches
            total_test_r2 = test_r2.compute()

            r2_train_list.append(total_train_r2)
            r2_test_list.append(total_test_r2)
            print(f'Epoch {epoch+1}: r2_train = {total_train_r2}, r2_test = {total_test_r2}')
            
            #reset for next epoch
            train_r2.reset()
            test_r2.reset()
        res_r2_train.append(r2_train_list[-1].item())
        res_r2_test.append(r2_test_list[-1].item())
    metrics_result = pd.DataFrame({'r2_train': res_r2_train, 'r2_test': res_r2_test}, index = ['Adam', 'RMSprop', 'SGD'])
    return metrics_result

In [19]:
res = create_res_for_all_optimizers(30, nn.MSELoss())
res.sort_values(by='r2_test', ascending=False)

Epoch 1: r2_train = -0.17853009700775146, r2_test = 0.1983097791671753
Epoch 2: r2_train = 0.19532936811447144, r2_test = 0.3249870538711548
Epoch 3: r2_train = 0.2934742569923401, r2_test = 0.40335822105407715
Epoch 4: r2_train = 0.34463369846343994, r2_test = 0.45341843366622925
Epoch 5: r2_train = 0.38222330808639526, r2_test = 0.4932246208190918
Epoch 6: r2_train = 0.4400065541267395, r2_test = 0.5450773239135742
Epoch 7: r2_train = 0.49208223819732666, r2_test = 0.5519078373908997
Epoch 8: r2_train = 0.4914061427116394, r2_test = 0.5987149477005005
Epoch 9: r2_train = 0.532020092010498, r2_test = 0.6128345727920532
Epoch 10: r2_train = 0.5510189533233643, r2_test = 0.6181284189224243
Epoch 11: r2_train = 0.5746726989746094, r2_test = 0.6621918678283691
Epoch 12: r2_train = 0.5955236554145813, r2_test = 0.6842654943466187
Epoch 13: r2_train = 0.6121718883514404, r2_test = 0.6934701204299927
Epoch 14: r2_train = 0.6154148578643799, r2_test = 0.6678123474121094
Epoch 15: r2_train = 0

Unnamed: 0,r2_train,r2_test
Adam,0.682212,0.731265
SGD,0.17597,0.273951
RMSprop,0.722266,-35.897038


### По итогу кажется что SGD сходится очень медленно и нужно больше эпох чтобы сошелся до уровня других. Далее по скорости RMSprop, но судя по метрикам переобучился, как и говорилось на уроке, Adam и в моем случае предпочтительней.