# HW 5 Question 3

# Setup

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import torch
import torch.optim as optim
import torch.nn as nn
torch.set_printoptions(edgeitems=2, linewidth=75)

## define model and functions

In [2]:
def model(feature, w5, w4, w3, w2, w1, b):
    return feature[4] * w5 + feature[3] * w4 + feature[2] * w3 + feature[1] * w2 + feature[0] * w1 + b

In [3]:
def loss_fn(predicted, actual):
    squared_diffs = (predicted - actual)**2
    return squared_diffs.mean()

In [4]:
rates_to_learn_at = [1/x for x in [10, 100, 1000, 10000, 100000]]
rates_to_learn_at

[0.1, 0.01, 0.001, 0.0001, 1e-05]

In [5]:
percent_for_validation = 0.2

## Data import

In [6]:
housing_df = pd.DataFrame(pd.read_csv('Housing.csv'))
housing_df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [7]:
print(f"shape = {np.shape(housing_df)}")

# creates a list of all variables from the column names
feature_list = list( housing_df.columns )

print(f"features are: {feature_list}")

shape = (545, 13)
features are: ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'parking', 'prefarea', 'furnishingstatus']


In [8]:
# Maps to turn categorys into numbers 
def boolean_map(x):
    return x.map({'yes': 1 , 'no': 0})
def furnish_map(x):
    return x.map({'furnished': 1 , 'semi-furnished': 0.5 , 'unfurnished': 0})

# Extracts the yes and no column names
binary_vars = [*feature_list[5:10], feature_list[11]]
print(f"binary vars = {binary_vars}")

# Extracts the furnishing column names
furnish_vars = [feature_list[12]]
print(f"furnish vars = {furnish_vars}")

# Extracts the column names that are actual values
valued_vars = feature_list.copy()
[valued_vars.remove( item ) for item in binary_vars]
[valued_vars.remove( item ) for item in furnish_vars]
print(f"value vars = {valued_vars}")

binary vars = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
furnish vars = ['furnishingstatus']
value vars = ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']


In [9]:
x_df = housing_df.copy()

## scale data
scaler = StandardScaler()
# scaler = MinMaxScaler()

x_df[valued_vars] = scaler.fit_transform(x_df[valued_vars])

## map text values
x_df[binary_vars] = x_df[binary_vars].apply(boolean_map)
x_df[furnish_vars] = x_df[furnish_vars].apply(furnish_map)

## make y_df
y_df = x_df.pop('price')


In [10]:
# input_values = valued_vars.copy()
# input_values.remove('price')


# x_df = x_df[input_values]

In [11]:
x_df.head()

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,1.046726,1.403419,1.421812,1.378217,1,0,0,0,1,1.517692,1,1.0
1,1.75701,1.403419,5.405809,2.532024,1,0,0,0,1,2.679409,0,1.0
2,2.218232,0.047278,1.421812,0.22441,1,0,1,0,0,1.517692,1,0.5
3,1.083624,1.403419,1.421812,0.22441,1,0,1,0,1,2.679409,1,1.0
4,1.046726,1.403419,-0.570187,0.22441,1,1,1,0,1,1.517692,0,1.0


In [12]:
y_df.head()

0    4.566365
1    4.004484
2    4.004484
3    3.985755
4    3.554979
Name: price, dtype: float64

In [13]:
# Remove unwanted data
for item in [*binary_vars, *furnish_vars] :
    x_df.pop(item)
x_df.head()

Unnamed: 0,area,bedrooms,bathrooms,stories,parking
0,1.046726,1.403419,1.421812,1.378217,1.517692
1,1.75701,1.403419,5.405809,2.532024,2.679409
2,2.218232,0.047278,1.421812,0.22441,1.517692
3,1.083624,1.403419,1.421812,0.22441,2.679409
4,1.046726,1.403419,-0.570187,0.22441,1.517692


In [14]:
# Convert data frame to tensor

x = torch.tensor(x_df.values, dtype=torch.float32)
y = torch.tensor(y_df.values, dtype=torch.float32)

In [15]:
print(x.shape)

torch.Size([545, 5])


## Split into train and validation sets

In [16]:
n_samples = x.shape[0]
n_val = int(percent_for_validation * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices  # <1>

(tensor([110, 297, 255, 485, 289, 264, 323,  32, 316,  76,  75, 380, 359,
         392, 452, 408, 237, 450,  56, 275, 490, 295, 344, 405, 515, 201,
         265, 198, 121, 276, 451, 307,  34, 135,  61,  74, 542, 230, 514,
         182, 129, 477, 361, 493, 126, 183, 336, 365, 239, 141, 433, 245,
         406,  86, 122, 229, 302,  44, 389, 219, 143, 435, 218, 172,  60,
         166, 233, 217, 314, 132, 192, 395, 124, 404, 285,   5, 328, 238,
         120,  99, 354,  22, 510, 440, 214, 349,  51, 234, 524, 471, 434,
         241, 394, 370,   8, 498,   9, 235, 248, 460,   3, 331, 432,  41,
         149, 329, 252, 343, 468, 522, 151, 386, 189, 495, 486, 402, 301,
         527, 270,  29,   7, 512, 210, 320, 457, 459, 437, 209, 377,  89,
         378, 322, 507, 462, 190,  87, 116, 503, 342, 309,  11, 426, 139,
         473, 523,  78, 193, 163, 159, 520, 325,  12, 540, 508, 466, 223,
         513, 501,  49,  25, 410, 134,  98,  18,  73, 530, 213, 185, 103,
         205, 196, 531, 422, 357, 260,

In [17]:
training_x = x[train_indices]
training_y  = y [train_indices]

validation_x = x[val_indices]
validation_y  = y[val_indices]

# training_un   = 0.1 * training_x
# validation_un = 0.1 * validation_x

In [18]:
type(training_x)

torch.Tensor

In [19]:
training_x.shape[1]

5

## Learning how to use neural network component
nothing to grade

In [20]:
num_features = training_x.shape[1]
print(num_features)

linear_model = nn.Linear(num_features,1) # <1>
linear_model(training_x)

5


tensor([[ 0.2683],
        [ 0.5689],
        [ 0.9638],
        [ 1.0409],
        [ 0.5852],
        [ 0.9486],
        [-0.2637],
        [-0.0575],
        [ 0.0768],
        [ 0.6753],
        [ 0.2348],
        [ 1.1554],
        [ 0.2473],
        [ 0.4131],
        [ 0.3248],
        [ 1.0896],
        [ 0.5474],
        [ 0.3420],
        [ 0.6044],
        [ 1.0938],
        [ 0.0865],
        [ 0.1939],
        [ 1.0699],
        [ 0.5502],
        [ 0.3104],
        [ 0.4269],
        [ 0.2670],
        [ 0.6709],
        [ 0.8398],
        [ 0.7683],
        [ 1.4517],
        [-0.3230],
        [-0.4169],
        [ 0.3605],
        [ 1.3581],
        [ 0.0457],
        [ 1.0396],
        [ 1.0144],
        [ 0.2828],
        [ 0.3368],
        [ 0.3891],
        [ 1.2160],
        [ 0.9399],
        [ 0.6687],
        [ 0.3420],
        [ 0.8240],
        [ 0.4632],
        [ 1.2805],
        [ 0.0404],
        [ 0.1768],
        [-0.4437],
        [-0.1545],
        [ 0.

In [21]:
linear_model.weight

Parameter containing:
tensor([[ 0.2855, -0.3065,  0.1013, -0.2249, -0.3219]],
       requires_grad=True)

In [22]:
linear_model.bias

Parameter containing:
tensor([0.4293], requires_grad=True)

In [23]:
linear_model = nn.Linear(5, 1) # <1>
optimizer = optim.SGD(
    linear_model.parameters(), # <2>
    lr=1e-2)

linear_model(training_x)

tensor([[-8.7330e-01],
        [ 1.6118e-01],
        [-8.5777e-01],
        [-4.3072e-01],
        [-1.2204e+00],
        [-2.8546e-02],
        [-1.2032e+00],
        [ 1.3089e-01],
        [ 1.0220e-01],
        [ 5.2089e-01],
        [ 5.1196e-01],
        [-4.5519e-01],
        [-4.2530e-01],
        [ 3.6526e-01],
        [-5.0291e-01],
        [-4.4112e-01],
        [ 3.3657e-01],
        [ 3.8045e-01],
        [ 1.6032e-01],
        [-4.4202e-01],
        [-8.5014e-03],
        [ 4.1208e-01],
        [-4.3691e-01],
        [-4.6475e-02],
        [ 3.8719e-01],
        [ 3.6231e-01],
        [ 3.9647e-01],
        [ 3.1019e-01],
        [ 2.7411e-01],
        [-8.1601e-01],
        [-5.1846e-01],
        [-3.6454e-01],
        [ 6.5790e-01],
        [ 9.7059e-01],
        [-9.4200e-01],
        [ 2.1663e-04],
        [-4.3044e-01],
        [-4.2608e-02],
        [ 3.9310e-01],
        [ 3.8157e-01],
        [-1.3420e-01],
        [-4.6812e-01],
        [-1.2972e-01],
        [-7

In [24]:
linear_model.parameters()

<generator object Module.parameters at 0x7ff24546dc80>

In [25]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[-0.0610,  0.2715, -0.1074,  0.3795, -0.3130]],
        requires_grad=True),
 Parameter containing:
 tensor([-0.0788], requires_grad=True)]

## Training function for neural network

In [26]:
def training_loop(n_epochs, optimizer, model, loss_fn, 
                  train_x, val_x,
                  train_y, val_y, 
                  epoch_report = 1000):
    for epoch in range(1, n_epochs + 1):
        train_p = model(train_x) # <1>
        loss_train = loss_fn(train_p, train_y)

        val_p = model(val_x) # <1>
        loss_val = loss_fn(val_p, val_y)
        
        optimizer.zero_grad()
        loss_train.backward() # <2>
        optimizer.step()

        if epoch == 1 or epoch % epoch_report == 0 or epoch == n_epochs:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")


## Testing Neural Network training with a single node
nothign to grade

In [28]:
linear_model = nn.Linear(5, 1) # <1>
optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    model = linear_model,
    loss_fn = loss_fn,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 500
    )

print()
print("Weight", linear_model.weight)
print("Bias", linear_model.bias)

print("-"*50)

Epoch 1, Training loss 1.3292, Validation loss 1.4603
Epoch 500, Training loss 0.9600, Validation loss 1.1625
Epoch 1000, Training loss 0.9600, Validation loss 1.1625
Epoch 1500, Training loss 0.9600, Validation loss 1.1625
Epoch 2000, Training loss 0.9600, Validation loss 1.1625
Epoch 2500, Training loss 0.9600, Validation loss 1.1625
Epoch 3000, Training loss 0.9600, Validation loss 1.1625

Weight Parameter containing:
tensor([[ 5.2285e-12, -2.7448e-10, -2.5329e-12, -3.1897e-10, -3.1807e-10]],
       requires_grad=True)
Bias Parameter containing:
tensor([-0.0221], requires_grad=True)
--------------------------------------------------


## Learning Sequential layer models
nothing here to be graded

In [29]:
seq_model_example = nn.Sequential(
            nn.Linear(9, 22), # <1>
            nn.Tanh(),
            nn.Linear(22, 3)) # <2>
seq_model_example

Sequential(
  (0): Linear(in_features=9, out_features=22, bias=True)
  (1): Tanh()
  (2): Linear(in_features=22, out_features=3, bias=True)
)

In [30]:
[param.shape for param in seq_model_example.parameters()]

[torch.Size([22, 9]), torch.Size([22]), torch.Size([3, 22]), torch.Size([3])]

In [31]:
for name, param in seq_model_example.named_parameters():
    print(name, param.shape)

0.weight torch.Size([22, 9])
0.bias torch.Size([22])
2.weight torch.Size([3, 22])
2.bias torch.Size([3])


In [32]:
from collections import OrderedDict

seq_model_example = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(1, 8)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(8, 1))
]))

seq_model_example

Sequential(
  (hidden_linear): Linear(in_features=1, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

In [33]:
for name, param in seq_model_example.named_parameters():
    print(name, param.shape)

hidden_linear.weight torch.Size([8, 1])
hidden_linear.bias torch.Size([8])
output_linear.weight torch.Size([1, 8])
output_linear.bias torch.Size([1])


In [34]:
seq_model_example.output_linear.bias

Parameter containing:
tensor([-0.2838], requires_grad=True)

## Proper training with a sequential model

### example of single layer
for comparison

In [35]:
linear_model_0 = nn.Linear(5, 1)
linear_model_0.zero_grad()

optimizer_0 = optim.SGD(linear_model.parameters(), lr=1e-3)

loss_0 = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer_0,
    model = linear_model_0,
    loss_fn = loss_0, 
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y,  
    epoch_report = 25)

print()
print("Weight", linear_model.weight)
print("Bias", linear_model.bias)

print("-"*50)

Epoch 1, Training loss 1.1196, Validation loss 1.3638
Epoch 25, Training loss 1.1196, Validation loss 1.3638
Epoch 50, Training loss 1.1196, Validation loss 1.3638
Epoch 75, Training loss 1.1196, Validation loss 1.3638
Epoch 100, Training loss 1.1196, Validation loss 1.3638
Epoch 125, Training loss 1.1196, Validation loss 1.3638
Epoch 150, Training loss 1.1196, Validation loss 1.3638
Epoch 175, Training loss 1.1196, Validation loss 1.3638
Epoch 200, Training loss 1.1196, Validation loss 1.3638

Weight Parameter containing:
tensor([[ 5.2285e-12, -2.7448e-10, -2.5329e-12, -3.1897e-10, -3.1807e-10]],
       requires_grad=True)
Bias Parameter containing:
tensor([-0.0221], requires_grad=True)
--------------------------------------------------


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


### 3a
1 hidden layer with 8 nodes

In [36]:
seq_model_1 = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(5, 8)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(8, 1))
]))
seq_model_1.zero_grad()

optimizer_1 = optim.SGD(seq_model_1.parameters(), lr=1e-3)
optimizer_1.zero_grad()

loss_1 = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer_1,
    model = seq_model_1,
    loss_fn = loss_1,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.0410, Validation loss 1.2467
Epoch 25, Training loss 1.0370, Validation loss 1.2431
Epoch 50, Training loss 1.0332, Validation loss 1.2397
Epoch 75, Training loss 1.0297, Validation loss 1.2364
Epoch 100, Training loss 1.0264, Validation loss 1.2334
Epoch 125, Training loss 1.0234, Validation loss 1.2306
Epoch 150, Training loss 1.0206, Validation loss 1.2280
Epoch 175, Training loss 1.0181, Validation loss 1.2255
Epoch 200, Training loss 1.0157, Validation loss 1.2232


In [37]:
# print('Hidden Weight', seq_model_1.hidden_linear.weight)
# print('Hidden Bias', seq_model_1.hidden_linear.bias)
# print()
# print("Output Weight", seq_model_1.output_linear.weight)
# print("Output Bias", seq_model_1.output_linear.bias)

In [38]:
# print('output', seq_model_1(validation_x))
# print('answer', validation_y)

### 3b
add 2 more hidden layers

In [39]:
a = 10
b = 25
c = 8

seq_model_2 = nn.Sequential(OrderedDict([
    ('hidden_linear_1', nn.Linear(5, a)),
    ('hidden_activation', nn.Tanh()),
    ('hidden_linear_2', nn.Linear(a, b)),
    ('hidden_activation', nn.Tanh()),
    ('hidden_linear_3', nn.Linear(b, c)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(c, 1))
]))
seq_model_2.zero_grad()

optimizer_2 = optim.SGD(seq_model_2.parameters(), lr=1e-3)
optimizer_2.zero_grad()

loss_2 = nn.MSELoss()

training_loop(
    n_epochs = 200, 
    optimizer = optimizer_2,
    model = seq_model_2,
    loss_fn = loss_2,
    train_x = training_x,
    val_x = validation_x, 
    train_y = training_y,
    val_y = validation_y, 
    epoch_report = 25)

Epoch 1, Training loss 1.0456, Validation loss 1.1854
Epoch 25, Training loss 1.0272, Validation loss 1.1744
Epoch 50, Training loss 1.0124, Validation loss 1.1663
Epoch 75, Training loss 1.0010, Validation loss 1.1608
Epoch 100, Training loss 0.9923, Validation loss 1.1571
Epoch 125, Training loss 0.9855, Validation loss 1.1548
Epoch 150, Training loss 0.9803, Validation loss 1.1534
Epoch 175, Training loss 0.9763, Validation loss 1.1528
Epoch 200, Training loss 0.9732, Validation loss 1.1526


## Result of output

In [40]:
models = [linear_model_0, seq_model_1, seq_model_2]
for model in models:
    print(f'{model}\n')
        
    predictions = model(validation_x) 
    loss = loss_fn(predictions, validation_y)
    print(f'     Validation Loss: {loss}\n')
        
    for name, param in seq_model_2.named_parameters():
        print(f'     {name}  \t: ', param.shape)
        
    print('\n' + '-'*90 + '\n')

Linear(in_features=5, out_features=1, bias=True)

     Validation Loss: 1.3637760877609253

     hidden_linear_1.weight  	:  torch.Size([10, 5])
     hidden_linear_1.bias  	:  torch.Size([10])
     hidden_linear_2.weight  	:  torch.Size([25, 10])
     hidden_linear_2.bias  	:  torch.Size([25])
     hidden_linear_3.weight  	:  torch.Size([8, 25])
     hidden_linear_3.bias  	:  torch.Size([8])
     output_linear.weight  	:  torch.Size([1, 8])
     output_linear.bias  	:  torch.Size([1])

------------------------------------------------------------------------------------------

Sequential(
  (hidden_linear): Linear(in_features=5, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

     Validation Loss: 1.2231050729751587

     hidden_linear_1.weight  	:  torch.Size([10, 5])
     hidden_linear_1.bias  	:  torch.Size([10])
     hidden_linear_2.weight  	:  torch.Size([25, 10])
     hidden_linear_2.bias  	:  torch.Si

In [41]:
# from matplotlib import pyplot as plt

# x_sample = torch.arange(0, 5, 0.1).unsqueeze(1)
# h = scaler.inverse_transform(x_sample)
# print(h)
# # Split x and y scaler?
# # how to show output?

# fig = plt.figure(dpi=600)
# plt.xlabel("Fahrenheit")
# plt.ylabel("Celsius")
# # plt.plot(t_u.numpy(), t_c.numpy(), 'o')
# plt.plot(t_range.numpy(), seq_model(scaler.fit(t_range)).detach().numpy(), 'c-')
# # plt.plot(t_u.numpy(), seq_model(0.1 * t_u).detach().numpy(), 'kx')

In [42]:
# # Exercises here!

# neuron_count = 20

# seq_model = nn.Sequential(OrderedDict([
#     ('hidden_linear', nn.Linear(1, neuron_count)),
#     ('hidden_activation', nn.Tanh()),
#     ('output_linear', nn.Linear(neuron_count, 1))
# ]))

# optimizer = optim.SGD(seq_model.parameters(), lr=1e-4)

# training_loop(
#     n_epochs = 5000, 
#     optimizer = optimizer,
#     model = seq_model,
#     loss_fn = nn.MSELoss(),
#     t_u_train = t_un_train,
#     t_u_val = t_un_val, 
#     t_c_train = t_c_train,
#     t_c_val = t_c_val)

# from matplotlib import pyplot as plt

# t_range = torch.arange(20., 90.).unsqueeze(1)

# fig = plt.figure(dpi=150)
# plt.xlabel("Fahrenheit")
# plt.ylabel("Celsius")
# plt.plot(t_u.numpy(), t_c.numpy(), 'o')
# plt.plot(t_range.numpy(), seq_model(0.1 * t_range).detach().numpy(), 'c-')
# plt.plot(t_u.numpy(), seq_model(0.1 * t_u).detach().numpy(), 'kx')
