# HW 5 Question 3

# Setup

In [185]:
%matplotlib inline
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import torch
import torch.optim as optim
import torch.nn as nn
torch.set_printoptions(edgeitems=2, linewidth=75)

## define model and functions

In [186]:
def model(feature, w5, w4, w3, w2, w1, b):
    return feature[4] * w5 + feature[3] * w4 + feature[2] * w3 + feature[1] * w2 + feature[0] * w1 + b

In [187]:
def loss_fn(predicted, actual):
    squared_diffs = (predicted - actual)**2
    return squared_diffs.mean()

In [188]:
rates_to_learn_at = [1/x for x in [10, 100, 1000, 10000, 100000]]
rates_to_learn_at

[0.1, 0.01, 0.001, 0.0001, 1e-05]

In [189]:
percent_for_validation = 0.2

## Data import

In [190]:
housing_df = pd.DataFrame(pd.read_csv('Housing.csv'))
housing_df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [191]:
print(f"shape = {np.shape(housing_df)}")

# creates a list of all variables from the column names
feature_list = list( housing_df.columns )

print(f"features are: {feature_list}")

shape = (545, 13)
features are: ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'parking', 'prefarea', 'furnishingstatus']


In [192]:
# Maps to turn categorys into numbers 
def boolean_map(x):
    return x.map({'yes': 1 , 'no': 0})
def furnish_map(x):
    return x.map({'furnished': 1 , 'semi-furnished': 0.5 , 'unfurnished': 0})

# Extracts the yes and no column names
binary_vars = [*feature_list[5:10], feature_list[11]]
print(f"binary vars = {binary_vars}")

# Extracts the furnishing column names
furnish_vars = [feature_list[12]]
print(f"furnish vars = {furnish_vars}")

# Extracts the column names that are actual values
valued_vars = feature_list.copy()
[valued_vars.remove( item ) for item in binary_vars]
[valued_vars.remove( item ) for item in furnish_vars]
print(f"value vars = {valued_vars}")

binary vars = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
furnish vars = ['furnishingstatus']
value vars = ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']


In [193]:
x_df = housing_df.copy()

## scale data
scaler = StandardScaler()
# scaler = MinMaxScaler()

x_df[valued_vars] = scaler.fit_transform(x_df[valued_vars])

## map text values
x_df[binary_vars] = x_df[binary_vars].apply(boolean_map)
x_df[furnish_vars] = x_df[furnish_vars].apply(furnish_map)

## make y_df
y_df = x_df.pop('price')


In [194]:
# input_values = valued_vars.copy()
# input_values.remove('price')


# x_df = x_df[input_values]

In [195]:
x_df.head()

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,1.046726,1.403419,1.421812,1.378217,1,0,0,0,1,1.517692,1,1.0
1,1.75701,1.403419,5.405809,2.532024,1,0,0,0,1,2.679409,0,1.0
2,2.218232,0.047278,1.421812,0.22441,1,0,1,0,0,1.517692,1,0.5
3,1.083624,1.403419,1.421812,0.22441,1,0,1,0,1,2.679409,1,1.0
4,1.046726,1.403419,-0.570187,0.22441,1,1,1,0,1,1.517692,0,1.0


In [196]:
y_df.head()

0    4.566365
1    4.004484
2    4.004484
3    3.985755
4    3.554979
Name: price, dtype: float64

In [197]:
# Remove unwanted data
for item in [*binary_vars, *furnish_vars] :
    x_df.pop(item)
x_df.head()

Unnamed: 0,area,bedrooms,bathrooms,stories,parking
0,1.046726,1.403419,1.421812,1.378217,1.517692
1,1.75701,1.403419,5.405809,2.532024,2.679409
2,2.218232,0.047278,1.421812,0.22441,1.517692
3,1.083624,1.403419,1.421812,0.22441,2.679409
4,1.046726,1.403419,-0.570187,0.22441,1.517692


In [198]:
# Convert data frame to tensor

x = torch.tensor(x_df.values, dtype=torch.float32)
y = torch.tensor(y_df.values, dtype=torch.float32)

In [199]:
print(x.shape)

torch.Size([545, 5])


## Split into train and validation sets

### Set randomization keys
Set so that it should reproduce the same results every time. 
Remove to test that network is learning regardless of the randomization. 

In [200]:
seed = 404

torch.manual_seed(seed) ;
gen = torch.Generator() ;
gen.manual_seed(seed)   ;

<torch._C.Generator at 0x1d1cc39bbf0>

### Shuffle indices

In [201]:
n_samples = x.shape[0]
n_val = int(percent_for_validation * n_samples)

shuffled_indices = torch.randperm(n_samples, generator=gen)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices  # <1>

(tensor([223, 445, 209,  81,  86, 428,   0, 408, 206, 304, 127, 265,  30,
         197, 303, 266, 339, 314, 337, 479, 352,  99, 450,  28, 370,  44,
         477, 496, 488, 536,  82, 213, 379, 259,  12, 156,  35,  83, 291,
         145, 192, 331, 514, 505, 153, 112, 173, 215, 393, 451, 297, 309,
          41, 234, 378,  20, 122, 130, 201,  65,  96, 538,  19, 374,  84,
         262, 124, 459, 363, 543, 399, 498, 381,  63, 106, 464, 328, 473,
         185, 183, 162, 400,  49, 191, 409, 281, 268, 380, 336,  40, 544,
         274, 275, 386,   7, 480, 119,  43,   6, 499, 415, 492, 426, 460,
         184, 143, 397, 372,  27,  22,  50, 342, 487, 518,  57,  16,  56,
         189, 155, 190,  32, 347, 177,  87,  18, 230,   3, 200,  70,  89,
         420, 102,   9, 362, 141,  61, 247, 506,  15,  39, 243, 465, 248,
         435, 252, 148, 182, 329, 466, 535, 402, 338, 241, 489, 302, 471,
          25, 534, 472, 260, 319, 233, 101, 164, 404, 292, 167, 240, 434,
          10,  52, 298, 436, 194,  62,

In [202]:
training_x = x[train_indices]
training_y  = y [train_indices]

validation_x = x[val_indices]
validation_y  = y[val_indices]

### Validation

In [203]:
type(training_x)

torch.Tensor

In [204]:
training_x.shape

torch.Size([436, 5])

## Learning how to use neural network component
nothing to grade

In [205]:
num_features = training_x.shape[1]
print(num_features)

linear_model = nn.Linear(num_features,1) # <1>
linear_model(training_x)

5


tensor([[-1.5357e-01],
        [ 3.5349e-01],
        [-6.4986e-01],
        [ 6.1788e-01],
        [-2.4617e-02],
        [ 1.1321e-01],
        [ 3.5952e-01],
        [-2.2506e-02],
        [-2.3100e-01],
        [-4.0603e-01],
        [ 4.9768e-01],
        [ 1.0465e-01],
        [ 9.7517e-01],
        [ 7.5688e-01],
        [-3.9271e-01],
        [-1.5603e-01],
        [ 2.6022e-01],
        [ 4.2293e-01],
        [ 7.0381e-02],
        [-2.9799e-01],
        [ 4.6221e-02],
        [-2.5930e-01],
        [ 3.8631e-02],
        [-3.2389e-01],
        [ 2.2576e-01],
        [ 6.9338e-01],
        [-1.3370e-01],
        [-2.2506e-02],
        [-1.6664e-01],
        [-5.8248e-01],
        [-4.4474e-01],
        [-2.5973e-01],
        [ 1.4941e-03],
        [-1.5552e-01],
        [ 1.0222e-02],
        [-6.7070e-01],
        [ 1.0302e+00],
        [ 8.6532e-01],
        [ 9.6198e-02],
        [ 4.7854e-01],
        [-6.3596e-01],
        [ 2.1125e-01],
        [ 9.0754e-02],
        [-2

In [206]:
linear_model.weight

Parameter containing:
tensor([[-0.2511, -0.2303,  0.2523,  0.2684,  0.1208]],
       requires_grad=True)

In [207]:
linear_model.bias

Parameter containing:
tensor([0.0335], requires_grad=True)

In [208]:
linear_model = nn.Linear(5, 1) # <1>
optimizer = optim.SGD(
    linear_model.parameters(), # <2>
    lr=1e-2)

linear_model(training_x)

tensor([[-0.3476],
        [-0.8496],
        [-1.1790],
        [-0.1512],
        [-0.6138],
        [-0.3761],
        [ 0.2245],
        [-0.8697],
        [-1.1096],
        [ 0.1140],
        [-0.7039],
        [-0.3881],
        [ 0.4434],
        [ 0.0087],
        [-0.8833],
        [ 0.0640],
        [-0.4878],
        [-0.0968],
        [-0.6292],
        [-0.4389],
        [-0.1156],
        [-0.4714],
        [-0.4641],
        [-0.0723],
        [ 0.0909],
        [ 0.1940],
        [-0.9976],
        [-0.8697],
        [-0.3648],
        [-0.3539],
        [-1.2965],
        [-1.0089],
        [ 0.2452],
        [-0.6852],
        [-0.4378],
        [-1.2030],
        [ 0.5066],
        [-0.3580],
        [-0.3979],
        [-0.1120],
        [-1.1630],
        [ 0.1331],
        [-0.4041],
        [-0.5373],
        [-0.6697],
        [-0.5308],
        [-1.0496],
        [-0.2794],
        [-1.2728],
        [-1.2361],
        [-0.6022],
        [-0.5684],
        [-0.

In [209]:
linear_model.parameters()

<generator object Module.parameters at 0x000001D1CC48FA50>

In [210]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[-0.2889,  0.0392, -0.0566,  0.2421,  0.4295]],
        requires_grad=True),
 Parameter containing:
 tensor([-0.4330], requires_grad=True)]

## Training function for neural network

In [211]:
def training_loop(n_epochs, optimizer, model, loss_fn, 
                  train_x, val_x,
                  train_y, val_y, 
                  epoch_report = 1000):
    for epoch in range(1, n_epochs + 1):
        train_p = model(train_x) # <1>
        loss_train = loss_fn(train_p, train_y)

        val_p = model(val_x) # <1>
        loss_val = loss_fn(val_p, val_y)
        
        optimizer.zero_grad()
        loss_train.backward() # <2>
        optimizer.step()

        if epoch == 1 or epoch % epoch_report == 0 or epoch == n_epochs:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")


## Testing Neural Network training with a single node
nothign to grade

In [212]:
linear_model = nn.Linear(5, 1) # <1>
optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    model = linear_model,
    loss_fn = loss_fn,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 500
    )

print()
print("Weight", linear_model.weight)
print("Bias", linear_model.bias)

print("-"*50)

Epoch 1, Training loss 1.7092, Validation loss 1.3225
Epoch 500, Training loss 1.0504, Validation loss 0.7987
Epoch 1000, Training loss 1.0504, Validation loss 0.7987
Epoch 1500, Training loss 1.0504, Validation loss 0.7987
Epoch 2000, Training loss 1.0504, Validation loss 0.7987
Epoch 2500, Training loss 1.0504, Validation loss 0.7987
Epoch 3000, Training loss 1.0504, Validation loss 0.7987

Weight Parameter containing:
tensor([[-1.3901e-10,  4.9021e-11, -1.0323e-09,  2.8931e-10,  2.1499e-10]],
       requires_grad=True)
Bias Parameter containing:
tensor([-0.0082], requires_grad=True)
--------------------------------------------------


## Learning Sequential layer models
nothing here to be graded

In [213]:
seq_model_example = nn.Sequential(
            nn.Linear(9, 22), # <1>
            nn.Tanh(),
            nn.Linear(22, 3)) # <2>
seq_model_example

Sequential(
  (0): Linear(in_features=9, out_features=22, bias=True)
  (1): Tanh()
  (2): Linear(in_features=22, out_features=3, bias=True)
)

In [214]:
[param.shape for param in seq_model_example.parameters()]

[torch.Size([22, 9]), torch.Size([22]), torch.Size([3, 22]), torch.Size([3])]

In [215]:
for name, param in seq_model_example.named_parameters():
    print(name, param.shape)

0.weight torch.Size([22, 9])
0.bias torch.Size([22])
2.weight torch.Size([3, 22])
2.bias torch.Size([3])


In [216]:
from collections import OrderedDict

seq_model_example = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(1, 8)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(8, 1))
]))

seq_model_example

Sequential(
  (hidden_linear): Linear(in_features=1, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

In [217]:
for name, param in seq_model_example.named_parameters():
    print(name, param.shape)

hidden_linear.weight torch.Size([8, 1])
hidden_linear.bias torch.Size([8])
output_linear.weight torch.Size([1, 8])
output_linear.bias torch.Size([1])


In [218]:
seq_model_example.output_linear.bias

Parameter containing:
tensor([0.0729], requires_grad=True)

## Proper training with a sequential model

### example of single layer
for comparison

In [219]:
linear_model_0 = nn.Linear(5, 1)
linear_model_0.zero_grad()

optimizer_0 = optim.SGD(linear_model.parameters(), lr=1e-3)

loss_0 = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer_0,
    model = linear_model_0,
    loss_fn = loss_0, 
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y,  
    epoch_report = 25)

print()
print("Weight", linear_model.weight)
print("Bias", linear_model.bias)

print("-"*50)

Epoch 1, Training loss 1.2593, Validation loss 1.0225
Epoch 25, Training loss 1.2593, Validation loss 1.0225
Epoch 50, Training loss 1.2593, Validation loss 1.0225
Epoch 75, Training loss 1.2593, Validation loss 1.0225
Epoch 100, Training loss 1.2593, Validation loss 1.0225
Epoch 125, Training loss 1.2593, Validation loss 1.0225
Epoch 150, Training loss 1.2593, Validation loss 1.0225
Epoch 175, Training loss 1.2593, Validation loss 1.0225
Epoch 200, Training loss 1.2593, Validation loss 1.0225

Weight Parameter containing:
tensor([[-1.3901e-10,  4.9021e-11, -1.0323e-09,  2.8931e-10,  2.1499e-10]],
       requires_grad=True)
Bias Parameter containing:
tensor([-0.0082], requires_grad=True)
--------------------------------------------------


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


### 3a
1 hidden layer with 8 nodes

In [220]:
seq_model_1 = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(5, 8)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(8, 1))
]))
seq_model_1.zero_grad()

optimizer_1 = optim.SGD(seq_model_1.parameters(), lr=1e-3)
optimizer_1.zero_grad()

loss_1 = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer_1,
    model = seq_model_1,
    loss_fn = loss_1,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.2123, Validation loss 0.9398
Epoch 25, Training loss 1.2020, Validation loss 0.9310
Epoch 50, Training loss 1.1921, Validation loss 0.9225
Epoch 75, Training loss 1.1832, Validation loss 0.9148
Epoch 100, Training loss 1.1750, Validation loss 0.9077
Epoch 125, Training loss 1.1675, Validation loss 0.9013
Epoch 150, Training loss 1.1606, Validation loss 0.8953
Epoch 175, Training loss 1.1542, Validation loss 0.8898
Epoch 200, Training loss 1.1484, Validation loss 0.8847


### 3b
add 2 more hidden layers

In [221]:
a = 10
b = 25
c = 8

seq_model_2 = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation', nn.Tanh()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation', nn.Tanh()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(c, 1))
]))
seq_model_2.zero_grad()

optimizer_2 = optim.SGD(seq_model_2.parameters(), lr=1e-3)
optimizer_2.zero_grad()

loss_2 = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer_2,
    model = seq_model_2,
    loss_fn = loss_2,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.1121, Validation loss 0.8678
Epoch 25, Training loss 1.1048, Validation loss 0.8597
Epoch 50, Training loss 1.0984, Validation loss 0.8527
Epoch 75, Training loss 1.0932, Validation loss 0.8468
Epoch 100, Training loss 1.0889, Validation loss 0.8420
Epoch 125, Training loss 1.0852, Validation loss 0.8378
Epoch 150, Training loss 1.0821, Validation loss 0.8343
Epoch 175, Training loss 1.0795, Validation loss 0.8313
Epoch 200, Training loss 1.0772, Validation loss 0.8288


## Result of output

In [222]:
models = [linear_model_0, seq_model_1, seq_model_2]
for model in models:
    print(f'{model}\n')
        
    predictions = model(validation_x) 
    loss = loss_fn(predictions, validation_y)
    print(f'     Validation Loss: {loss}\n')
        
    for name, param in seq_model_2.named_parameters():
        print(f'     {name}  \t: ', param.shape)
        
    print('\n' + '-'*90 + '\n')

Linear(in_features=5, out_features=1, bias=True)

     Validation Loss: 1.0225023031234741

     hidden_linear_1.weight  	:  torch.Size([10, 5])
     hidden_linear_1.bias  	:  torch.Size([10])
     hidden_linear_2.weight  	:  torch.Size([25, 10])
     hidden_linear_2.bias  	:  torch.Size([25])
     hidden_linear_3.weight  	:  torch.Size([8, 25])
     hidden_linear_3.bias  	:  torch.Size([8])
     output_linear.weight  	:  torch.Size([1, 8])
     output_linear.bias  	:  torch.Size([1])

------------------------------------------------------------------------------------------

Sequential(
  (hidden_linear): Linear(in_features=5, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

     Validation Loss: 0.8844773769378662

     hidden_linear_1.weight  	:  torch.Size([10, 5])
     hidden_linear_1.bias  	:  torch.Size([10])
     hidden_linear_2.weight  	:  torch.Size([25, 10])
     hidden_linear_2.bias  	:  torch.Si