# Programming for Data Science and Artificial Intelligence

## Deep Learning -  PyTorch I - Introduction and Linear Regression

- [WEIDMAN] Ch7
- https://pytorch.org/tutorials/
- https://github.com/yunjey/pytorch-tutorial

Here we introduce PyTorch, an increasingly popular neural network framework based on **automatic differentiation**.

In [1]:
import torch
import numpy as np
import sys

### Practice

- Try to play around, change some neurons, and see what happens
- Plot the model line using <code>model.linear.weight.item()</code> and <code>model.linear.bias.item()</code>
- Try to load the boston dataset and learn to map the data to tensorDataset
- Try to plot the loss over time

In [2]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

In [3]:
X, y  = load_boston()['data'] , load_boston()['target']

In [4]:
X.shape

(506, 13)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3)

In [6]:
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

In [7]:
train_ds = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))

In [8]:
train_dl = DataLoader(train_ds, batch_size = 10, shuffle = True)

In [9]:
for i, (x, y) in enumerate(train_dl):
    print(y)
    break

tensor([13.3000, 21.9000, 25.0000, 32.2000, 43.1000, 24.8000, 25.0000, 33.0000,
        24.7000, 33.2000])


In [10]:
class NeuralNet(torch.nn.Module):
    def __init__(self, n_feature, size_hidden, n_output):
        super(NeuralNet, self).__init__()
        self.fc1 = torch.nn.Linear(n_feature, size_hidden)   # hidden layer
        self.fc2 = torch.nn.Linear(size_hidden, 10) 
        self.fc3 = torch.nn.Linear(10, n_output)   # output layer
        self.relu   = torch.nn.ReLU()

    def forward(self, x):
        out =  self.fc1(x)   # activation function for hidden layer
        out =  self.relu(out)
        out =  self.fc2(out)
        out =  self.relu(out)
        out =  self.fc3(out)                 
        return out
    
    

In [31]:
n_feature = X_train.shape[1]
size_hidden = 30
n_output = 1
learning_rate = 0.001
num_epoch = 300

model = NeuralNet(n_feature, size_hidden, n_output)

In [32]:
model.parameters

<bound method Module.parameters of NeuralNet(
  (fc1): Linear(in_features=13, out_features=30, bias=True)
  (fc2): Linear(in_features=30, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=1, bias=True)
  (relu): ReLU()
)>

In [33]:
model.fc2.bias

Parameter containing:
tensor([ 0.1647,  0.1370, -0.1647, -0.1222, -0.1728,  0.1358, -0.0408,  0.1297,
        -0.0876,  0.1454], requires_grad=True)

In [34]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

In [35]:
list(model.parameters())[0].shape

torch.Size([30, 13])

In [36]:
next(iter(train_dl))

[tensor([[9.2660e-02, 3.4000e+01, 6.0900e+00, 0.0000e+00, 4.3300e-01, 6.4950e+00,
          1.8400e+01, 5.4917e+00, 7.0000e+00, 3.2900e+02, 1.6100e+01, 3.8361e+02,
          8.6700e+00],
         [6.5388e+00, 0.0000e+00, 1.8100e+01, 1.0000e+00, 6.3100e-01, 7.0160e+00,
          9.7500e+01, 1.2024e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 3.9205e+02,
          2.9600e+00],
         [1.1329e-01, 3.0000e+01, 4.9300e+00, 0.0000e+00, 4.2800e-01, 6.8970e+00,
          5.4300e+01, 6.3361e+00, 6.0000e+00, 3.0000e+02, 1.6600e+01, 3.9125e+02,
          1.1380e+01],
         [1.4236e+01, 0.0000e+00, 1.8100e+01, 0.0000e+00, 6.9300e-01, 6.3430e+00,
          1.0000e+02, 1.5741e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 3.9690e+02,
          2.0320e+01],
         [4.3488e+00, 0.0000e+00, 1.8100e+01, 0.0000e+00, 5.8000e-01, 6.1670e+00,
          8.4000e+01, 3.0334e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 3.9690e+02,
          1.6290e+01],
         [5.7312e+00, 0.0000e+00, 1.8100e+01, 0.0000e+00, 5.3200e

In [None]:
total_step = len(train_dl)  #for printing purpose


for epoch in range(num_epoch):
    for i, (data, labels) in enumerate(train_dl):  
        
        optimizer.zero_grad()
        
        #Forward pass
        y_hat = model(data)
#      
        
        loss = criterion(y_hat, labels.view(-1, 1))  #note that outputs shape [batch, num_classes]) while labels shape ([batch, ])
        
        # Backward and optimize
        loss.backward()   
        
        #update gradient
        optimizer.step()
        
        if (i+1) % 6 == 0:
            sys.stdout.write('\rEpoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epoch, i+1, total_step, loss.item()))

Epoch [51/300], Step [36/36], Loss: 90.72222

In [18]:
final_yhat = model(torch.FloatTensor(X_train))
final_yhat = final_yhat.detach().numpy().flatten()

In [19]:
final_yhat

array([24.00967  , 29.015322 , 20.396034 , 11.233485 , 15.263755 ,
       21.749237 , 29.254725 , 25.154339 , 20.089449 , 29.087109 ,
       24.103935 , 13.086896 , 28.512547 ,  9.75282  , 29.546942 ,
       21.660011 , 25.394604 , 21.457478 , 24.426897 ,  9.982354 ,
       19.39489  , 18.360893 , 33.261425 ,  2.7118273, 24.392748 ,
       18.580584 , 22.840246 , 27.838388 , 24.800522 , 31.439762 ,
       23.385416 , 30.221468 , 23.610353 , 14.405337 , 25.155867 ,
       24.658539 , 20.768032 , 12.444699 , 14.445821 , 21.185764 ,
       10.041176 , 14.497876 , 30.22368  , 20.903114 , 25.917723 ,
       22.039082 , 28.778955 ,  8.57257  , 24.940117 , 24.39798  ,
       15.37675  , 16.991955 ,  9.479245 , 25.840862 , 18.549465 ,
       31.877716 , 24.049004 , 21.016006 , 31.613398 , 21.61774  ,
       13.212491 , 13.221108 , 23.816484 , 21.187317 ,  7.890893 ,
        8.269265 , 19.714943 , 25.751335 , 21.005432 , 30.718475 ,
       33.965797 , 29.850899 , 15.249267 , 26.032711 , 31.4384

In [20]:
from sklearn.metrics import mean_squared_error, r2_score

In [21]:
print(mean_squared_error(y_pred = final_yhat, y_true = y_train))
print(r2_score(y_pred = final_yhat, y_true = y_train))

33.68940573011265
0.6012897677969486


### Use NN model to predict X_test

In [22]:
nntestHat  = model(torch.FloatTensor(X_test)).detach().numpy().flatten()

In [23]:
print(mean_squared_error(y_pred = nntestHat, y_true = y_test))
print(r2_score(y_pred = nntestHat, y_true = y_test))

35.138145317518386
0.5825253664732106


## Compare with ML sklearn linear regression

In [24]:
from sklearn.linear_model import LinearRegression

In [25]:
skmodel = LinearRegression()
skmodel.fit(X_train, y_train)

LinearRegression()

In [26]:
sk_train_pred = skmodel.predict(X_train)

In [27]:
print(mean_squared_error(y_pred = sk_train_pred, y_true = y_train))
print(r2_score(y_pred = sk_train_pred, y_true = y_train))

22.007700342563332
0.7395414040801704


In [28]:
skmodel.coef_

array([-1.16688055e-01,  3.99265259e-02,  9.34493581e-03,  3.68419504e+00,
       -2.18044555e+01,  2.73443676e+00,  2.40102302e-02, -1.48726638e+00,
        3.26298410e-01, -1.05942855e-02, -1.02550914e+00,  9.44131156e-03,
       -6.42251374e-01])

In [29]:
sk_test_pred  = skmodel.predict(X_test)

In [30]:
print(mean_squared_error(y_pred = sk_test_pred, y_true = y_test))
print(r2_score(y_pred = sk_test_pred, y_true = y_test))

23.531914539154286
0.7204184424179227


So from the results above, nn outperforms ml in sklearn both in training and test set