# HW 5 Question 3

# Setup

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import torch
import torch.optim as optim
import torch.nn as nn
torch.set_printoptions(edgeitems=2, linewidth=75)

## define model and functions

In [2]:
def model(feature, w5, w4, w3, w2, w1, b):
    return feature[4] * w5 + feature[3] * w4 + feature[2] * w3 + feature[1] * w2 + feature[0] * w1 + b

In [3]:
def loss_fn(predicted, actual):
    squared_diffs = (predicted - actual)**2
    return squared_diffs.mean()

In [4]:
rates_to_learn_at = [1/x for x in [10, 100, 1000, 10000, 100000]]
rates_to_learn_at

[0.1, 0.01, 0.001, 0.0001, 1e-05]

In [5]:
percent_for_validation = 0.2

## Data import

In [6]:
housing_df = pd.DataFrame(pd.read_csv('Housing.csv'))
housing_df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [7]:
print(f"shape = {np.shape(housing_df)}")

# creates a list of all variables from the column names
feature_list = list( housing_df.columns )

print(f"features are: {feature_list}")

shape = (545, 13)
features are: ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'parking', 'prefarea', 'furnishingstatus']


In [8]:
# Maps to turn categorys into numbers 
def boolean_map(x):
    return x.map({'yes': 1 , 'no': 0})
def furnish_map(x):
    return x.map({'furnished': 1 , 'semi-furnished': 0.5 , 'unfurnished': 0})

# Extracts the yes and no column names
binary_vars = [*feature_list[5:10], feature_list[11]]
print(f"binary vars = {binary_vars}")

# Extracts the furnishing column names
furnish_vars = [feature_list[12]]
print(f"furnish vars = {furnish_vars}")

# Extracts the column names that are actual values
valued_vars = feature_list.copy()
[valued_vars.remove( item ) for item in binary_vars]
[valued_vars.remove( item ) for item in furnish_vars]
print(f"value vars = {valued_vars}")

binary vars = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
furnish vars = ['furnishingstatus']
value vars = ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']


In [9]:
x_df = housing_df.copy()

## scale data
scaler = StandardScaler()
# scaler = MinMaxScaler()

x_df[valued_vars] = scaler.fit_transform(x_df[valued_vars])

## map text values
x_df[binary_vars] = x_df[binary_vars].apply(boolean_map)
x_df[furnish_vars] = x_df[furnish_vars].apply(furnish_map)

## make y_df
y_df = x_df.pop('price')


In [10]:
# input_values = valued_vars.copy()
# input_values.remove('price')


# x_df = x_df[input_values]

In [11]:
x_df.head()

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,1.046726,1.403419,1.421812,1.378217,1,0,0,0,1,1.517692,1,1.0
1,1.75701,1.403419,5.405809,2.532024,1,0,0,0,1,2.679409,0,1.0
2,2.218232,0.047278,1.421812,0.22441,1,0,1,0,0,1.517692,1,0.5
3,1.083624,1.403419,1.421812,0.22441,1,0,1,0,1,2.679409,1,1.0
4,1.046726,1.403419,-0.570187,0.22441,1,1,1,0,1,1.517692,0,1.0


In [12]:
y_df.head()

0    4.566365
1    4.004484
2    4.004484
3    3.985755
4    3.554979
Name: price, dtype: float64

In [13]:
# Remove unwanted data
for item in [*binary_vars, *furnish_vars] :
    x_df.pop(item)
x_df.head()

Unnamed: 0,area,bedrooms,bathrooms,stories,parking
0,1.046726,1.403419,1.421812,1.378217,1.517692
1,1.75701,1.403419,5.405809,2.532024,2.679409
2,2.218232,0.047278,1.421812,0.22441,1.517692
3,1.083624,1.403419,1.421812,0.22441,2.679409
4,1.046726,1.403419,-0.570187,0.22441,1.517692


In [14]:
# Convert data frame to tensor

x = torch.tensor(x_df.values, dtype=torch.float32)
y = torch.tensor(y_df.values, dtype=torch.float32)

In [15]:
print(x.shape)

torch.Size([545, 5])


## Split into train and validation sets

### Set randomization keys
Set so that it should reproduce the same results every time. 
Remove to test that network is learning regardless of the randomization. 

### Shuffle indices

In [16]:
n_samples = x.shape[0]
n_val = int(percent_for_validation * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices  # <1>

(tensor([273, 453,  72, 266, 237, 362, 241, 138, 195, 442,  71, 173, 311,
         440, 317, 391, 379, 270,  40, 280, 376, 188, 336, 480, 168, 434,
         512, 278, 457, 400, 420, 498, 229,  62, 253, 238, 359, 291, 219,
         402, 109, 326, 398, 205,   3, 302, 357, 320, 318, 285, 171, 209,
         319, 538, 103, 134, 145, 489, 158, 232, 192, 386, 283, 349, 128,
         524, 177,  98,  53,  68,  92, 500, 406,  22, 255, 110, 256, 250,
          49, 486, 120, 312, 175,  67, 217, 404, 544, 484, 119, 410, 466,
         532, 245, 313, 401, 468,  70, 334, 353,  52, 527, 210, 321,  12,
         375,  63, 354, 220,  61, 162,  46,  86,  58, 183,  78,  90, 330,
         214,   1, 495, 343, 201, 378, 332, 461, 107,  99, 465, 298,  66,
          38, 236, 531,  81,  24, 133, 426, 243, 505,  28, 445, 539,  54,
         403, 230, 305, 485, 335, 493, 118, 492, 488, 387,   2, 413, 325,
         172, 221, 160,  57, 121, 279, 275, 196, 427, 244, 542, 437, 314,
         191, 352, 366, 418, 224, 190,

In [17]:
training_x = x[train_indices]
training_y  = y [train_indices].unsqueeze(1)

validation_x = x[val_indices]
validation_y  = y[val_indices].unsqueeze(1)

### Validation

In [18]:
type(training_x)

torch.Tensor

In [19]:
training_x.shape

torch.Size([436, 5])

## Learning how to use neural network component
nothing to grade

In [20]:
num_features = training_x.shape[1]
print(num_features)

linear_model = nn.Linear(num_features,1) # <1>
linear_model(training_x)

5


tensor([[-0.7285],
        [-0.4643],
        [ 0.6932],
        [-0.6636],
        [-0.1336],
        [-0.3109],
        [-0.7795],
        [ 0.2782],
        [ 0.5942],
        [-0.5041],
        [ 1.0739],
        [-0.2372],
        [-0.1547],
        [-0.6104],
        [ 0.3751],
        [-0.3380],
        [-0.8044],
        [ 0.7889],
        [ 0.0258],
        [ 0.3411],
        [ 0.2859],
        [ 0.2748],
        [-0.3323],
        [-0.3438],
        [-0.5462],
        [-0.5946],
        [-0.0067],
        [-0.4039],
        [-0.7546],
        [-0.4184],
        [ 0.1092],
        [-0.1102],
        [ 0.5759],
        [ 0.2212],
        [ 0.0471],
        [ 0.0939],
        [-0.7443],
        [-0.3465],
        [ 0.0724],
        [-0.1215],
        [ 0.2600],
        [-0.9756],
        [-0.3811],
        [-0.5166],
        [ 0.2480],
        [-0.0691],
        [-0.3216],
        [-0.3087],
        [-0.6158],
        [ 0.0362],
        [-0.0539],
        [-0.3695],
        [-0.

In [21]:
linear_model.weight

Parameter containing:
tensor([[ 0.2244, -0.2470,  0.3084,  0.3578, -0.0446]],
       requires_grad=True)

In [22]:
linear_model.bias

Parameter containing:
tensor([-0.0478], requires_grad=True)

In [23]:
linear_model = nn.Linear(5, 1) # <1>
optimizer = optim.SGD(
    linear_model.parameters(), # <2>
    lr=1e-2)

linear_model(training_x)

tensor([[ 0.6709],
        [ 0.9484],
        [-0.1572],
        [ 1.0041],
        [ 0.0584],
        [ 0.4906],
        [ 1.0515],
        [-0.0488],
        [-0.8446],
        [ 0.9646],
        [-1.2211],
        [-0.8687],
        [ 0.8218],
        [-0.1676],
        [ 0.1533],
        [ 0.1419],
        [ 1.0616],
        [-0.3498],
        [-0.0067],
        [ 0.2852],
        [ 0.1897],
        [ 0.3123],
        [ 0.8687],
        [ 0.5394],
        [-0.1938],
        [-0.1740],
        [ 0.4274],
        [ 0.9591],
        [ 0.2510],
        [ 0.9296],
        [ 0.3800],
        [ 0.4697],
        [-0.7447],
        [-0.5998],
        [ 0.4054],
        [-0.9429],
        [ 0.6419],
        [ 0.1454],
        [-0.0258],
        [ 0.4131],
        [-0.6156],
        [-0.0795],
        [ 0.5546],
        [ 0.9440],
        [ 0.1795],
        [ 0.4883],
        [ 0.1095],
        [ 0.9202],
        [ 0.1943],
        [-0.0110],
        [ 0.3598],
        [ 0.0937],
        [ 0.

In [24]:
linear_model.parameters()

<generator object Module.parameters at 0x0000018EBFC45DD0>

In [25]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[-0.0917, -0.2094, -0.3707, -0.0933,  0.3584]],
        requires_grad=True),
 Parameter containing:
 tensor([0.1606], requires_grad=True)]

## Training function for neural network

In [26]:
def training_loop(n_epochs, optimizer, model, loss_fn, 
                  train_x, val_x,
                  train_y, val_y, 
                  epoch_report = 1000):
    for epoch in range(1, n_epochs + 1):
        train_p = model(train_x) # <1>
        loss_train = loss_fn(train_p, train_y)

        val_p = model(val_x) # <1>
        loss_val = loss_fn(val_p, val_y)
        
        optimizer.zero_grad()
        loss_train.backward() # <2>
        optimizer.step()

        if epoch == 1 or epoch % epoch_report == 0 or epoch == n_epochs:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")


## Testing Neural Network training with a single node
nothign to grade

In [27]:
linear_model = nn.Linear(5, 1) # <1>
optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    model = linear_model,
    loss_fn = loss_fn,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 500
    )

print()
print("Weight", linear_model.weight)
print("Bias", linear_model.bias)

print("-"*50)

Epoch 1, Training loss 0.9578, Validation loss 0.9070
Epoch 500, Training loss 0.4547, Validation loss 0.3775
Epoch 1000, Training loss 0.4547, Validation loss 0.3776
Epoch 1500, Training loss 0.4547, Validation loss 0.3776
Epoch 2000, Training loss 0.4547, Validation loss 0.3776
Epoch 2500, Training loss 0.4547, Validation loss 0.3776
Epoch 3000, Training loss 0.4547, Validation loss 0.3776

Weight Parameter containing:
tensor([[0.3760, 0.0722, 0.3187, 0.2267, 0.1625]], requires_grad=True)
Bias Parameter containing:
tensor([-8.7716e-05], requires_grad=True)
--------------------------------------------------


## Learning Sequential layer models
nothing here to be graded

In [28]:
seq_model_example = nn.Sequential(
            nn.Linear(9, 22), # <1>
            nn.Tanh(),
            nn.Linear(22, 3)) # <2>
seq_model_example

Sequential(
  (0): Linear(in_features=9, out_features=22, bias=True)
  (1): Tanh()
  (2): Linear(in_features=22, out_features=3, bias=True)
)

In [29]:
[param.shape for param in seq_model_example.parameters()]

[torch.Size([22, 9]), torch.Size([22]), torch.Size([3, 22]), torch.Size([3])]

In [30]:
for name, param in seq_model_example.named_parameters():
    print(name, param.shape)

0.weight torch.Size([22, 9])
0.bias torch.Size([22])
2.weight torch.Size([3, 22])
2.bias torch.Size([3])


In [31]:
from collections import OrderedDict

seq_model_example = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(1, 8)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(8, 1))
]))

seq_model_example

Sequential(
  (hidden_linear): Linear(in_features=1, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

In [32]:
for name, param in seq_model_example.named_parameters():
    print(name, param.shape)

hidden_linear.weight torch.Size([8, 1])
hidden_linear.bias torch.Size([8])
output_linear.weight torch.Size([1, 8])
output_linear.bias torch.Size([1])


In [33]:
seq_model_example.output_linear.bias

Parameter containing:
tensor([0.0245], requires_grad=True)

## Proper training with a sequential model

### Paramater Checking

In [34]:
print(f"train_x:  {training_x.shape}")
print(f"train_y:  {training_y.shape}")
print(f"valid_x:  {validation_x.shape}")
print(f"valid_y:  {validation_y.shape}")

train_x:  torch.Size([436, 5])
train_y:  torch.Size([436, 1])
valid_x:  torch.Size([109, 5])
valid_y:  torch.Size([109, 1])


In [35]:
models = list()

### example of single layer
for comparison

In [36]:
linear_model_0 = nn.Linear(5, 1)

# -------------------- #

model = linear_model_0
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y,  
    epoch_report = 25)

print()
print("Weight", linear_model.weight)
print("Bias", linear_model.bias)

print("-"*50)

Epoch 1, Training loss 0.7408, Validation loss 0.6298
Epoch 25, Training loss 0.7165, Validation loss 0.6104
Epoch 50, Training loss 0.6937, Validation loss 0.5921
Epoch 75, Training loss 0.6732, Validation loss 0.5755
Epoch 100, Training loss 0.6546, Validation loss 0.5604
Epoch 125, Training loss 0.6378, Validation loss 0.5468
Epoch 150, Training loss 0.6226, Validation loss 0.5343
Epoch 175, Training loss 0.6088, Validation loss 0.5229
Epoch 200, Training loss 0.5962, Validation loss 0.5125

Weight Parameter containing:
tensor([[0.3760, 0.0722, 0.3187, 0.2267, 0.1625]], requires_grad=True)
Bias Parameter containing:
tensor([-8.7716e-05], requires_grad=True)
--------------------------------------------------


### 3a
1 hidden layer with 8 nodes

In [37]:
seq_model_1 = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(5, 8)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(8, 1))
]))

# -------------------- #

model = seq_model_1
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.4462, Validation loss 1.5959
Epoch 25, Training loss 1.3866, Validation loss 1.5307
Epoch 50, Training loss 1.3314, Validation loss 1.4702
Epoch 75, Training loss 1.2823, Validation loss 1.4159
Epoch 100, Training loss 1.2382, Validation loss 1.3670
Epoch 125, Training loss 1.1984, Validation loss 1.3226
Epoch 150, Training loss 1.1621, Validation loss 1.2820
Epoch 175, Training loss 1.1289, Validation loss 1.2446
Epoch 200, Training loss 1.0983, Validation loss 1.2101


### 3b
add 2 more hidden layers

#### set 1

In [38]:
a = 10
b = 25
c = 8

seq_model_2 = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', nn.Tanh()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', nn.Tanh()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', nn.Tanh()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = seq_model_2
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.0654, Validation loss 1.1568
Epoch 25, Training loss 1.0403, Validation loss 1.1277
Epoch 50, Training loss 1.0161, Validation loss 1.0995
Epoch 75, Training loss 0.9933, Validation loss 1.0730
Epoch 100, Training loss 0.9719, Validation loss 1.0479
Epoch 125, Training loss 0.9515, Validation loss 1.0241
Epoch 150, Training loss 0.9321, Validation loss 1.0013
Epoch 175, Training loss 0.9135, Validation loss 0.9794
Epoch 200, Training loss 0.8955, Validation loss 0.9583


#### set 2

In [39]:
a = 20
b = 15
c = 10

seq_model_3 = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', nn.Tanh()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', nn.Tanh()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', nn.Tanh()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = seq_model_3
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.0803, Validation loss 1.1498
Epoch 25, Training loss 1.0479, Validation loss 1.1172
Epoch 50, Training loss 1.0180, Validation loss 1.0868
Epoch 75, Training loss 0.9912, Validation loss 1.0594
Epoch 100, Training loss 0.9670, Validation loss 1.0345
Epoch 125, Training loss 0.9449, Validation loss 1.0116
Epoch 150, Training loss 0.9245, Validation loss 0.9903
Epoch 175, Training loss 0.9056, Validation loss 0.9703
Epoch 200, Training loss 0.8878, Validation loss 0.9514


#### set 3

In [40]:
a = 20
b = 15
c = 10

seq_model_4 = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', nn.Tanh()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', nn.Tanh()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', nn.Tanh()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = seq_model_4
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 0.9756, Validation loss 1.0590
Epoch 25, Training loss 0.9478, Validation loss 1.0297
Epoch 50, Training loss 0.9204, Validation loss 1.0008
Epoch 75, Training loss 0.8945, Validation loss 0.9732
Epoch 100, Training loss 0.8698, Validation loss 0.9467
Epoch 125, Training loss 0.8462, Validation loss 0.9211
Epoch 150, Training loss 0.8234, Validation loss 0.8964
Epoch 175, Training loss 0.8014, Validation loss 0.8723
Epoch 200, Training loss 0.7802, Validation loss 0.8489


### Change in activation
for personal testing

### Change in activation
for personal testing

#### Sigmoid

In [41]:
a = 10
b = 25
c = 8
activation = nn.Sigmoid

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.1705, Validation loss 1.2562
Epoch 25, Training loss 1.1205, Validation loss 1.2063
Epoch 50, Training loss 1.0825, Validation loss 1.1684
Epoch 75, Training loss 1.0550, Validation loss 1.1409
Epoch 100, Training loss 1.0350, Validation loss 1.1210
Epoch 125, Training loss 1.0205, Validation loss 1.1066
Epoch 150, Training loss 1.0100, Validation loss 1.0961
Epoch 175, Training loss 1.0023, Validation loss 1.0885
Epoch 200, Training loss 0.9968, Validation loss 1.0830


#### RELU

In [42]:
a = 10
b = 25
c = 8
activation = nn.ReLU

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.0820, Validation loss 1.1854
Epoch 25, Training loss 1.0688, Validation loss 1.1711
Epoch 50, Training loss 1.0570, Validation loss 1.1580
Epoch 75, Training loss 1.0468, Validation loss 1.1467
Epoch 100, Training loss 1.0378, Validation loss 1.1366
Epoch 125, Training loss 1.0298, Validation loss 1.1276
Epoch 150, Training loss 1.0228, Validation loss 1.1195
Epoch 175, Training loss 1.0164, Validation loss 1.1121
Epoch 200, Training loss 1.0104, Validation loss 1.1053


#### Leaky RELU

In [43]:
a = 10
b = 25
c = 8
activation = nn.LeakyReLU

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.1113, Validation loss 1.2064
Epoch 25, Training loss 1.0891, Validation loss 1.1833
Epoch 50, Training loss 1.0688, Validation loss 1.1620
Epoch 75, Training loss 1.0510, Validation loss 1.1433
Epoch 100, Training loss 1.0354, Validation loss 1.1268
Epoch 125, Training loss 1.0216, Validation loss 1.1122
Epoch 150, Training loss 1.0096, Validation loss 1.0994
Epoch 175, Training loss 0.9990, Validation loss 1.0881
Epoch 200, Training loss 0.9897, Validation loss 1.0780


#### RELU 6

In [44]:
a = 10
b = 25
c = 8
activation = nn.ReLU6

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 0.9809, Validation loss 1.0655
Epoch 25, Training loss 0.9802, Validation loss 1.0648
Epoch 50, Training loss 0.9796, Validation loss 1.0641
Epoch 75, Training loss 0.9790, Validation loss 1.0634
Epoch 100, Training loss 0.9784, Validation loss 1.0628
Epoch 125, Training loss 0.9778, Validation loss 1.0621
Epoch 150, Training loss 0.9772, Validation loss 1.0614
Epoch 175, Training loss 0.9766, Validation loss 1.0607
Epoch 200, Training loss 0.9759, Validation loss 1.0600


#### ELU

In [45]:
a = 10
b = 25
c = 8
activation = nn.ELU

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.0497, Validation loss 1.1426
Epoch 25, Training loss 1.0300, Validation loss 1.1211
Epoch 50, Training loss 1.0111, Validation loss 1.1001
Epoch 75, Training loss 0.9935, Validation loss 1.0804
Epoch 100, Training loss 0.9769, Validation loss 1.0616
Epoch 125, Training loss 0.9611, Validation loss 1.0436
Epoch 150, Training loss 0.9459, Validation loss 1.0261
Epoch 175, Training loss 0.9312, Validation loss 1.0091
Epoch 200, Training loss 0.9169, Validation loss 0.9924


#### Hardsigmoid

In [46]:
a = 10
b = 25
c = 8
activation = nn.Hardsigmoid

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.0054, Validation loss 1.0917
Epoch 25, Training loss 0.9996, Validation loss 1.0859
Epoch 50, Training loss 0.9951, Validation loss 1.0814
Epoch 75, Training loss 0.9918, Validation loss 1.0781
Epoch 100, Training loss 0.9894, Validation loss 1.0758
Epoch 125, Training loss 0.9876, Validation loss 1.0740
Epoch 150, Training loss 0.9863, Validation loss 1.0727
Epoch 175, Training loss 0.9853, Validation loss 1.0718
Epoch 200, Training loss 0.9846, Validation loss 1.0711


#### Hardtanh

In [47]:
a = 10
b = 25
c = 8
activation = nn.Hardtanh

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.0800, Validation loss 1.1891
Epoch 25, Training loss 1.0285, Validation loss 1.1367
Epoch 50, Training loss 0.9810, Validation loss 1.0882
Epoch 75, Training loss 0.9384, Validation loss 1.0447
Epoch 100, Training loss 0.8999, Validation loss 1.0051
Epoch 125, Training loss 0.8647, Validation loss 0.9687
Epoch 150, Training loss 0.8323, Validation loss 0.9351
Epoch 175, Training loss 0.8023, Validation loss 0.9038
Epoch 200, Training loss 0.7745, Validation loss 0.8747


#### softsign

In [48]:
a = 10
b = 25
c = 8
activation = nn.Softsign

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 0.9750, Validation loss 1.0614
Epoch 25, Training loss 0.9726, Validation loss 1.0588
Epoch 50, Training loss 0.9701, Validation loss 1.0561
Epoch 75, Training loss 0.9677, Validation loss 1.0534
Epoch 100, Training loss 0.9652, Validation loss 1.0507
Epoch 125, Training loss 0.9628, Validation loss 1.0480
Epoch 150, Training loss 0.9604, Validation loss 1.0453
Epoch 175, Training loss 0.9579, Validation loss 1.0426
Epoch 200, Training loss 0.9555, Validation loss 1.0399


## Result of ALL models

In [49]:
for model in models:
    print(f'{model}\n')
        
    predictions = model(validation_x) 
    loss = loss_fn(predictions, validation_y)
    print(f'     Validation Loss: {loss}\n')
        
    for name, param in model.named_parameters():
        print(f'     {name}  \t: ', param.shape)
        # print(f'     {name}  \t: ', param.shape)
        
    print('\n' + '-'*90 + '\n')

Linear(in_features=5, out_features=1, bias=True)

     Validation Loss: 0.5120754837989807

     weight  	:  torch.Size([1, 5])
     bias  	:  torch.Size([1])

------------------------------------------------------------------------------------------

Sequential(
  (hidden_linear): Linear(in_features=5, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

     Validation Loss: 1.2087867259979248

     hidden_linear.weight  	:  torch.Size([8, 5])
     hidden_linear.bias  	:  torch.Size([8])
     output_linear.weight  	:  torch.Size([1, 8])
     output_linear.bias  	:  torch.Size([1])

------------------------------------------------------------------------------------------

Sequential(
  (hidden_linear_1): Linear(in_features=5, out_features=10, bias=True)
  (hidden_activation_1): Tanh()
  (hidden_linear_2): Linear(in_features=10, out_features=25, bias=True)
  (hidden_activation_2): Tanh()
  (hidden_linear_3): Li