# HW 5 Question 3

# Setup

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import torch
import torch.optim as optim
import torch.nn as nn
torch.set_printoptions(edgeitems=2, linewidth=75)

## define model and functions

In [2]:
def model(feature, w5, w4, w3, w2, w1, b):
    return feature[4] * w5 + feature[3] * w4 + feature[2] * w3 + feature[1] * w2 + feature[0] * w1 + b

In [3]:
def loss_fn(predicted, actual):
    squared_diffs = (predicted - actual)**2
    return squared_diffs.mean()

In [4]:
rates_to_learn_at = [1/x for x in [10, 100, 1000, 10000, 100000]]
rates_to_learn_at

[0.1, 0.01, 0.001, 0.0001, 1e-05]

In [5]:
percent_for_validation = 0.2

## Data import

In [6]:
housing_df = pd.DataFrame(pd.read_csv('Housing.csv'))
housing_df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [7]:
print(f"shape = {np.shape(housing_df)}")

# creates a list of all variables from the column names
feature_list = list( housing_df.columns )

print(f"features are: {feature_list}")

shape = (545, 13)
features are: ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'parking', 'prefarea', 'furnishingstatus']


In [8]:
# Maps to turn categorys into numbers 
def boolean_map(x):
    return x.map({'yes': 1 , 'no': 0})
def furnish_map(x):
    return x.map({'furnished': 1 , 'semi-furnished': 0.5 , 'unfurnished': 0})

# Extracts the yes and no column names
binary_vars = [*feature_list[5:10], feature_list[11]]
print(f"binary vars = {binary_vars}")

# Extracts the furnishing column names
furnish_vars = [feature_list[12]]
print(f"furnish vars = {furnish_vars}")

# Extracts the column names that are actual values
valued_vars = feature_list.copy()
[valued_vars.remove( item ) for item in binary_vars]
[valued_vars.remove( item ) for item in furnish_vars]
print(f"value vars = {valued_vars}")

binary vars = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
furnish vars = ['furnishingstatus']
value vars = ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']


In [9]:
x_df = housing_df.copy()

## scale data
scaler = StandardScaler()
# scaler = MinMaxScaler()

x_df[valued_vars] = scaler.fit_transform(x_df[valued_vars])

## map text values
x_df[binary_vars] = x_df[binary_vars].apply(boolean_map)
x_df[furnish_vars] = x_df[furnish_vars].apply(furnish_map)

## make y_df
y_df = x_df.pop('price')


In [10]:
# input_values = valued_vars.copy()
# input_values.remove('price')


# x_df = x_df[input_values]

In [11]:
x_df.head()

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,1.046726,1.403419,1.421812,1.378217,1,0,0,0,1,1.517692,1,1.0
1,1.75701,1.403419,5.405809,2.532024,1,0,0,0,1,2.679409,0,1.0
2,2.218232,0.047278,1.421812,0.22441,1,0,1,0,0,1.517692,1,0.5
3,1.083624,1.403419,1.421812,0.22441,1,0,1,0,1,2.679409,1,1.0
4,1.046726,1.403419,-0.570187,0.22441,1,1,1,0,1,1.517692,0,1.0


In [12]:
y_df.head()

0    4.566365
1    4.004484
2    4.004484
3    3.985755
4    3.554979
Name: price, dtype: float64

In [13]:
# Remove unwanted data
for item in [*binary_vars, *furnish_vars] :
    x_df.pop(item)
x_df.head()

Unnamed: 0,area,bedrooms,bathrooms,stories,parking
0,1.046726,1.403419,1.421812,1.378217,1.517692
1,1.75701,1.403419,5.405809,2.532024,2.679409
2,2.218232,0.047278,1.421812,0.22441,1.517692
3,1.083624,1.403419,1.421812,0.22441,2.679409
4,1.046726,1.403419,-0.570187,0.22441,1.517692


In [14]:
# Convert data frame to tensor

x = torch.tensor(x_df.values, dtype=torch.float32)
y = torch.tensor(y_df.values, dtype=torch.float32)

In [15]:
print(x.shape)

torch.Size([545, 5])


## Split into train and validation sets

### Set randomization keys
Set so that it should reproduce the same results every time. 
Remove to test that network is learning regardless of the randomization. 

### Shuffle indices

In [16]:
n_samples = x.shape[0]
n_val = int(percent_for_validation * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices  # <1>

(tensor([ 75, 231, 226, 149, 496, 346, 183, 375, 324, 492, 535, 472, 508,
         533, 531, 337, 470,  58, 544, 120,  81, 391,  16, 263,  76, 437,
         194, 477, 318, 493, 328, 389, 509, 223, 317, 522, 339, 479, 186,
         262, 323, 413, 154,  77, 406, 182, 107, 113, 195, 484, 393, 510,
         425, 112, 248, 417, 376, 289, 293,  25, 398, 480,  13, 426, 284,
         368, 266,  45,  89, 118,  62,  65, 433, 292,  73, 165, 260, 298,
          90, 331, 476, 464, 299, 327,  70, 378, 198, 269,  74, 311, 117,
         203, 209, 427, 276, 216, 133, 213, 225, 277, 356, 132, 106, 315,
         244, 501, 140, 253, 163, 450, 340, 392, 285, 348, 321, 181, 481,
         495, 432, 304, 128, 245, 169, 499, 297, 451, 365, 116, 258,  96,
         212, 489, 471,   3, 103, 135, 483, 126, 414, 114, 129, 482, 280,
         396, 515, 313, 191, 366, 310, 404, 374, 428, 397, 246,  79,   7,
         104, 345, 511, 119, 354, 424, 316, 520, 143, 407, 228, 259, 264,
         179, 516,  19, 456, 410, 388,

In [17]:
training_x = x[train_indices]
training_y  = y [train_indices].unsqueeze(1)

validation_x = x[val_indices]
validation_y  = y[val_indices].unsqueeze(1)

### Validation

In [18]:
type(training_x)

torch.Tensor

In [19]:
training_x.shape

torch.Size([436, 5])

## Learning how to use neural network component
nothing to grade

In [20]:
num_features = training_x.shape[1]
print(num_features)

linear_model = nn.Linear(num_features,1) # <1>
linear_model(training_x)

5


tensor([[ 0.8297],
        [ 0.0688],
        [-0.4804],
        [ 0.6824],
        [ 0.0802],
        [ 0.1625],
        [-0.4275],
        [-0.2851],
        [-0.1012],
        [ 0.0964],
        [ 0.1254],
        [-0.0689],
        [ 0.0298],
        [ 0.1632],
        [-0.0546],
        [ 1.2435],
        [-0.0264],
        [ 0.0689],
        [-0.0194],
        [-0.2815],
        [ 0.7981],
        [ 0.0831],
        [ 0.4997],
        [-0.0342],
        [ 0.3814],
        [-0.1630],
        [ 0.4583],
        [-0.0407],
        [ 0.0663],
        [ 0.1142],
        [ 0.7706],
        [-0.0849],
        [ 0.8196],
        [-0.3659],
        [ 0.6378],
        [ 0.1538],
        [ 0.9657],
        [ 0.0335],
        [-1.0003],
        [ 0.1117],
        [-0.1733],
        [ 1.0917],
        [ 0.8068],
        [ 0.3713],
        [-0.1214],
        [ 0.0360],
        [-0.4904],
        [-0.6694],
        [ 1.6119],
        [ 0.2011],
        [-0.3221],
        [ 0.2502],
        [ 0.

In [21]:
linear_model.weight

Parameter containing:
tensor([[-0.2730,  0.0213,  0.4376, -0.1277, -0.0305]],
       requires_grad=True)

In [22]:
linear_model.bias

Parameter containing:
tensor([0.0695], requires_grad=True)

In [23]:
linear_model = nn.Linear(5, 1) # <1>
optimizer = optim.SGD(
    linear_model.parameters(), # <2>
    lr=1e-2)

linear_model(training_x)

tensor([[-4.5048e-01],
        [ 4.6416e-02],
        [-1.1400e+00],
        [ 1.9509e-01],
        [ 1.6214e-01],
        [-4.3423e-01],
        [ 2.0628e-01],
        [-2.1971e-01],
        [-3.5891e-01],
        [-7.2435e-01],
        [-8.7945e-02],
        [-1.1553e+00],
        [ 2.0028e-01],
        [-5.5911e-01],
        [ 1.3985e-01],
        [-2.2964e-01],
        [ 1.1849e-01],
        [-1.1585e+00],
        [-4.2087e-01],
        [-1.2008e-01],
        [-5.1811e-01],
        [-4.9848e-01],
        [-4.1647e-01],
        [-4.0962e-01],
        [-5.2081e-01],
        [ 6.0710e-03],
        [ 4.8909e-01],
        [ 2.5366e-01],
        [ 4.8323e-02],
        [ 1.2095e-02],
        [-2.8137e-01],
        [-4.9560e-01],
        [-4.1001e-01],
        [-3.7437e-01],
        [-6.1260e-01],
        [-5.5196e-01],
        [-5.5345e-01],
        [-5.8522e-01],
        [ 4.4610e-01],
        [ 1.3831e-01],
        [-4.1783e-01],
        [-5.2448e-01],
        [-7.4055e-01],
        [-5

In [24]:
linear_model.parameters()

<generator object Module.parameters at 0x0000026193E44900>

In [25]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[ 0.2067, -0.1078,  0.0389, -0.3662, -0.1628]],
        requires_grad=True),
 Parameter containing:
 tensor([-0.3186], requires_grad=True)]

## Training function for neural network

In [26]:
def training_loop(n_epochs, optimizer, model, loss_fn, 
                  train_x, val_x,
                  train_y, val_y, 
                  epoch_report = 1000):
    for epoch in range(1, n_epochs + 1):
        train_p = model(train_x) # <1>
        loss_train = loss_fn(train_p, train_y)

        val_p = model(val_x) # <1>
        loss_val = loss_fn(val_p, val_y)
        
        optimizer.zero_grad()
        loss_train.backward() # <2>
        optimizer.step()

        if epoch == 1 or epoch % epoch_report == 0 or epoch == n_epochs:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")


## Testing Neural Network training with a single node
nothign to grade

In [27]:
linear_model = nn.Linear(5, 1) # <1>
optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    model = linear_model,
    loss_fn = loss_fn,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 500
    )

print()
print("Weight", linear_model.weight)
print("Bias", linear_model.bias)

print("-"*50)

Epoch 1, Training loss 0.7268, Validation loss 0.7543
Epoch 500, Training loss 0.4233, Validation loss 0.5092
Epoch 1000, Training loss 0.4233, Validation loss 0.5093
Epoch 1500, Training loss 0.4233, Validation loss 0.5093
Epoch 2000, Training loss 0.4233, Validation loss 0.5093
Epoch 2500, Training loss 0.4233, Validation loss 0.5093
Epoch 3000, Training loss 0.4233, Validation loss 0.5093

Weight Parameter containing:
tensor([[0.3730, 0.0728, 0.2829, 0.2737, 0.1600]], requires_grad=True)
Bias Parameter containing:
tensor([-0.0304], requires_grad=True)
--------------------------------------------------


## Learning Sequential layer models
nothing here to be graded

In [28]:
seq_model_example = nn.Sequential(
            nn.Linear(9, 22), # <1>
            nn.Tanh(),
            nn.Linear(22, 3)) # <2>
seq_model_example

Sequential(
  (0): Linear(in_features=9, out_features=22, bias=True)
  (1): Tanh()
  (2): Linear(in_features=22, out_features=3, bias=True)
)

In [29]:
[param.shape for param in seq_model_example.parameters()]

[torch.Size([22, 9]), torch.Size([22]), torch.Size([3, 22]), torch.Size([3])]

In [30]:
for name, param in seq_model_example.named_parameters():
    print(name, param.shape)

0.weight torch.Size([22, 9])
0.bias torch.Size([22])
2.weight torch.Size([3, 22])
2.bias torch.Size([3])


In [31]:
from collections import OrderedDict

seq_model_example = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(1, 8)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(8, 1))
]))

seq_model_example

Sequential(
  (hidden_linear): Linear(in_features=1, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

In [32]:
for name, param in seq_model_example.named_parameters():
    print(name, param.shape)

hidden_linear.weight torch.Size([8, 1])
hidden_linear.bias torch.Size([8])
output_linear.weight torch.Size([1, 8])
output_linear.bias torch.Size([1])


In [33]:
seq_model_example.output_linear.bias

Parameter containing:
tensor([0.1545], requires_grad=True)

## Proper training with a sequential model

### Paramater Checking

In [34]:
print(f"train_x:  {training_x.shape}")
print(f"train_y:  {training_y.shape}")
print(f"valid_x:  {validation_x.shape}")
print(f"valid_y:  {validation_y.shape}")

train_x:  torch.Size([436, 5])
train_y:  torch.Size([436, 1])
valid_x:  torch.Size([109, 5])
valid_y:  torch.Size([109, 1])


In [35]:
models = list()

### example of single layer
for comparison

In [36]:
linear_model_0 = nn.Linear(5, 1)

# -------------------- #

model = linear_model_0
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y,  
    epoch_report = 25)

print()
print("Weight", linear_model.weight)
print("Bias", linear_model.bias)

print("-"*50)

Epoch 1, Training loss 0.9902, Validation loss 1.2515
Epoch 25, Training loss 0.9177, Validation loss 1.1612
Epoch 50, Training loss 0.8536, Validation loss 1.0805
Epoch 75, Training loss 0.7991, Validation loss 1.0113
Epoch 100, Training loss 0.7528, Validation loss 0.9518
Epoch 125, Training loss 0.7132, Validation loss 0.9006
Epoch 150, Training loss 0.6794, Validation loss 0.8563
Epoch 175, Training loss 0.6503, Validation loss 0.8178
Epoch 200, Training loss 0.6253, Validation loss 0.7845

Weight Parameter containing:
tensor([[0.3730, 0.0728, 0.2829, 0.2737, 0.1600]], requires_grad=True)
Bias Parameter containing:
tensor([-0.0304], requires_grad=True)
--------------------------------------------------


### 3a
1 hidden layer with 8 nodes

In [37]:
seq_model_1 = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(5, 8)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(8, 1))
]))

# -------------------- #

model = seq_model_1
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 0.8639, Validation loss 0.9577
Epoch 25, Training loss 0.8209, Validation loss 0.9131
Epoch 50, Training loss 0.7810, Validation loss 0.8718
Epoch 75, Training loss 0.7456, Validation loss 0.8350
Epoch 100, Training loss 0.7142, Validation loss 0.8023
Epoch 125, Training loss 0.6862, Validation loss 0.7730
Epoch 150, Training loss 0.6613, Validation loss 0.7469
Epoch 175, Training loss 0.6391, Validation loss 0.7236
Epoch 200, Training loss 0.6193, Validation loss 0.7027


### 3b
add 2 more hidden layers

#### set 1

In [38]:
a = 10
b = 25
c = 8

seq_model_2 = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', nn.Tanh()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', nn.Tanh()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', nn.Tanh()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = seq_model_2
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 0.9904, Validation loss 1.2954
Epoch 25, Training loss 0.9793, Validation loss 1.2786
Epoch 50, Training loss 0.9685, Validation loss 1.2621
Epoch 75, Training loss 0.9583, Validation loss 1.2465
Epoch 100, Training loss 0.9486, Validation loss 1.2316
Epoch 125, Training loss 0.9393, Validation loss 1.2175
Epoch 150, Training loss 0.9303, Validation loss 1.2038
Epoch 175, Training loss 0.9216, Validation loss 1.1907
Epoch 200, Training loss 0.9132, Validation loss 1.1779


#### set 2

In [39]:
a = 20
b = 15
c = 10

seq_model_3 = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', nn.Tanh()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', nn.Tanh()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', nn.Tanh()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = seq_model_3
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.0783, Validation loss 1.3181
Epoch 25, Training loss 1.0622, Validation loss 1.2989
Epoch 50, Training loss 1.0465, Validation loss 1.2803
Epoch 75, Training loss 1.0318, Validation loss 1.2628
Epoch 100, Training loss 1.0178, Validation loss 1.2462
Epoch 125, Training loss 1.0044, Validation loss 1.2304
Epoch 150, Training loss 0.9916, Validation loss 1.2153
Epoch 175, Training loss 0.9792, Validation loss 1.2007
Epoch 200, Training loss 0.9671, Validation loss 1.1865


#### set 3

In [40]:
a = 20
b = 15
c = 10

seq_model_4 = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', nn.Tanh()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', nn.Tanh()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', nn.Tanh()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = seq_model_4
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 0.9624, Validation loss 1.2314
Epoch 25, Training loss 0.9397, Validation loss 1.2004
Epoch 50, Training loss 0.9174, Validation loss 1.1699
Epoch 75, Training loss 0.8963, Validation loss 1.1411
Epoch 100, Training loss 0.8760, Validation loss 1.1135
Epoch 125, Training loss 0.8564, Validation loss 1.0871
Epoch 150, Training loss 0.8375, Validation loss 1.0616
Epoch 175, Training loss 0.8191, Validation loss 1.0370
Epoch 200, Training loss 0.8011, Validation loss 1.0130


### Change in activation
for personal testing

### Change in activation
for personal testing

#### Sigmoid

In [41]:
a = 10
b = 25
c = 8
activation = nn.Sigmoid

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.1223, Validation loss 1.5005
Epoch 25, Training loss 1.0774, Validation loss 1.4349
Epoch 50, Training loss 1.0433, Validation loss 1.3825
Epoch 75, Training loss 1.0187, Validation loss 1.3422
Epoch 100, Training loss 1.0008, Validation loss 1.3111
Epoch 125, Training loss 0.9879, Validation loss 1.2869
Epoch 150, Training loss 0.9786, Validation loss 1.2679
Epoch 175, Training loss 0.9718, Validation loss 1.2530
Epoch 200, Training loss 0.9669, Validation loss 1.2411


#### RELU

In [42]:
a = 10
b = 25
c = 8
activation = nn.ReLU

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 0.9450, Validation loss 1.1170
Epoch 25, Training loss 0.9401, Validation loss 1.1151
Epoch 50, Training loss 0.9354, Validation loss 1.1131
Epoch 75, Training loss 0.9309, Validation loss 1.1111
Epoch 100, Training loss 0.9266, Validation loss 1.1090
Epoch 125, Training loss 0.9226, Validation loss 1.1070
Epoch 150, Training loss 0.9187, Validation loss 1.1048
Epoch 175, Training loss 0.9148, Validation loss 1.1026
Epoch 200, Training loss 0.9110, Validation loss 1.1001


#### Leaky RELU

In [43]:
a = 10
b = 25
c = 8
activation = nn.LeakyReLU

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.1334, Validation loss 1.2114
Epoch 25, Training loss 1.1038, Validation loss 1.1925
Epoch 50, Training loss 1.0769, Validation loss 1.1758
Epoch 75, Training loss 1.0533, Validation loss 1.1615
Epoch 100, Training loss 1.0324, Validation loss 1.1492
Epoch 125, Training loss 1.0139, Validation loss 1.1385
Epoch 150, Training loss 0.9973, Validation loss 1.1292
Epoch 175, Training loss 0.9825, Validation loss 1.1212
Epoch 200, Training loss 0.9693, Validation loss 1.1141


#### RELU 6

In [44]:
a = 10
b = 25
c = 8
activation = nn.ReLU6

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.0727, Validation loss 1.2078
Epoch 25, Training loss 1.0599, Validation loss 1.2016
Epoch 50, Training loss 1.0480, Validation loss 1.1961
Epoch 75, Training loss 1.0374, Validation loss 1.1914
Epoch 100, Training loss 1.0278, Validation loss 1.1872
Epoch 125, Training loss 1.0191, Validation loss 1.1836
Epoch 150, Training loss 1.0112, Validation loss 1.1803
Epoch 175, Training loss 1.0040, Validation loss 1.1774
Epoch 200, Training loss 0.9973, Validation loss 1.1747


#### ELU

In [45]:
a = 10
b = 25
c = 8
activation = nn.ELU

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 0.9711, Validation loss 1.1355
Epoch 25, Training loss 0.9434, Validation loss 1.1135
Epoch 50, Training loss 0.9177, Validation loss 1.0927
Epoch 75, Training loss 0.8943, Validation loss 1.0735
Epoch 100, Training loss 0.8728, Validation loss 1.0555
Epoch 125, Training loss 0.8527, Validation loss 1.0382
Epoch 150, Training loss 0.8337, Validation loss 1.0214
Epoch 175, Training loss 0.8156, Validation loss 1.0049
Epoch 200, Training loss 0.7981, Validation loss 0.9885


#### Hardsigmoid

In [46]:
a = 10
b = 25
c = 8
activation = nn.Hardsigmoid

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 0.9675, Validation loss 1.2411
Epoch 25, Training loss 0.9644, Validation loss 1.2330
Epoch 50, Training loss 0.9620, Validation loss 1.2260
Epoch 75, Training loss 0.9602, Validation loss 1.2202
Epoch 100, Training loss 0.9588, Validation loss 1.2154
Epoch 125, Training loss 0.9578, Validation loss 1.2114
Epoch 150, Training loss 0.9570, Validation loss 1.2080
Epoch 175, Training loss 0.9564, Validation loss 1.2052
Epoch 200, Training loss 0.9560, Validation loss 1.2028


#### Hardtanh

In [47]:
a = 10
b = 25
c = 8
activation = nn.Hardtanh

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 0.8957, Validation loss 1.1293
Epoch 25, Training loss 0.8874, Validation loss 1.1194
Epoch 50, Training loss 0.8788, Validation loss 1.1090
Epoch 75, Training loss 0.8700, Validation loss 1.0985
Epoch 100, Training loss 0.8610, Validation loss 1.0879
Epoch 125, Training loss 0.8518, Validation loss 1.0770
Epoch 150, Training loss 0.8424, Validation loss 1.0660
Epoch 175, Training loss 0.8327, Validation loss 1.0547
Epoch 200, Training loss 0.8228, Validation loss 1.0431


#### softsign

In [48]:
a = 10
b = 25
c = 8
activation = nn.Softsign

sigmoid_model = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation_1', activation()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation_2', activation()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation_3', activation()),
    ('output_linear', nn.Linear(c, 1))
]))

# -------------------- #

model = sigmoid_model
models.append(model)

model.zero_grad()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer.zero_grad()

loss = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.0189, Validation loss 1.3244
Epoch 25, Training loss 1.0110, Validation loss 1.3112
Epoch 50, Training loss 1.0034, Validation loss 1.2986
Epoch 75, Training loss 0.9965, Validation loss 1.2869
Epoch 100, Training loss 0.9901, Validation loss 1.2760
Epoch 125, Training loss 0.9841, Validation loss 1.2658
Epoch 150, Training loss 0.9785, Validation loss 1.2563
Epoch 175, Training loss 0.9733, Validation loss 1.2474
Epoch 200, Training loss 0.9684, Validation loss 1.2389


## Result of ALL models

In [49]:
for model in models:
    print(f'{model}\n')
        
    predictions = model(validation_x) 
    loss = loss_fn(predictions, validation_y)
    print(f'     Validation Loss: {loss}\n')
        
    for name, param in model.named_parameters():
        print(f'     {name}  \t: ', param.shape)
        # print(f'     {name}  \t: ', param.shape)
        
    print('\n' + '-'*90 + '\n')

Linear(in_features=5, out_features=1, bias=True)

     Validation Loss: 0.7832158803939819

     weight  	:  torch.Size([1, 5])
     bias  	:  torch.Size([1])

------------------------------------------------------------------------------------------

Sequential(
  (hidden_linear): Linear(in_features=5, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

     Validation Loss: 0.7018929123878479

     hidden_linear.weight  	:  torch.Size([8, 5])
     hidden_linear.bias  	:  torch.Size([8])
     output_linear.weight  	:  torch.Size([1, 8])
     output_linear.bias  	:  torch.Size([1])

------------------------------------------------------------------------------------------

Sequential(
  (hidden_linear_1): Linear(in_features=5, out_features=10, bias=True)
  (hidden_activation_1): Tanh()
  (hidden_linear_2): Linear(in_features=10, out_features=25, bias=True)
  (hidden_activation_2): Tanh()
  (hidden_linear_3): Li