# 2. Exercises
a. Some practice getting slopes (derivatives, gradients) of some functions with PyTorch.  
b. First machine learning exercise: add one line of code to the training loop, where it says "ADD YOUR CODE HERE".

In [None]:
import numpy as np
import torch

In [None]:
# Here's how to take the derivative of a function:

def f(x):
    return x**2 + 3*x + 2

# Declare a tensor.
x = torch.tensor([3], dtype=torch.float32, requires_grad=True)

# Apply the function, save the result.
y = f(x)

print(y, "\n")

# Call backward on the result variable to trigger pytorch to calculate the gradients.
y.backward()

print(f"The slope of x**2 + 3*x + 2 when x = 3 is {x.grad}")

In [None]:
# If the same calculation is repeated, what happens to the value of grad?

# And again?

y = f(x)
y.backward()
print(f"The slope of x**2 + 3*x + 2 when x = 3 is {x.grad}")


y = f(x)
y.backward()
print(f"The slope of x**2 + 3*x + 2 when x = 3 is {x.grad}")



In [None]:
# Gradients accumulate by design.  This feature supports coding recurrent neural networks.
# For regular neural networks, though, gradients have to be zeroed after each time
# they are calculated.

# Zero the gradient on x using this line (uncomment):
# x.grad.zero_()

# Note, in PyTorch methods suffixed with an underscore '_' modify the variables in place.

In [None]:
# Use PyTorch to calculate the derivative of x^3 when x = -2:

def f(x):
    return x**3



# answer:


In [None]:
# The .data method on tensors will make a copy of the tensor.
# Does it copy the value of the requires_grad flag?


x = torch.tensor([-2], dtype=torch.float32, requires_grad=True)

v = x.data

print(x.requires_grad)
print(v.requires_grad)



# Gradient Descent Exercise

* Use PyTorch's auto differentiation + gradient descent to predict iris sepal width from sepal length.
* The iris dataset is a famous dataset collected by R.A. Fisher:

https://archive.ics.uci.edu/ml/datasets/iris

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

iris = pd.read_csv('./data/iris.csv')
setosa = iris[iris.species == 'setosa']

setosa.sample(3)

In [None]:
plt.figure(figsize=(9,6))
sns.set_style('darkgrid')
plt.scatter(setosa.sepal_length, setosa.sepal_width)
plt.xlabel("sepal length")
plt.ylabel("sepal width")

# The model

* instead of `y = sin(x - w)`, use something a little simpler: `f(x) = w*x + b`.
* will need to update both w and b.
* start with initial values of w = -0.5, b = 7

In [None]:
torch.manual_seed(4)

w = torch.randn(1, requires_grad = True)
b = torch.randn(1, requires_grad = True)


xs = torch.tensor(np.arange(4, 6, 0.1), dtype=torch.float32)

def f(x, w, b):
    return w * x + b

y = f(xs, w, b).data.numpy()

plt.figure(figsize=(9,6))
plt.scatter(setosa.sepal_length, setosa.sepal_width)
plt.plot(xs.numpy(), y, color='green');
plt.xlabel("sepal length")
plt.ylabel("sepal width")

In [None]:
def mean_squared_error(y_obs, y_predictions):
    difference_squared = (y_obs - y_predictions)**2
    
    return torch.mean(difference_squared)

In [None]:
x_obs = torch.tensor(setosa.sepal_length.values, dtype=torch.float32)
y_obs = torch.tensor(setosa.sepal_width.values,  dtype=torch.float32)


LEARNING_RATE = 0.001

for i in range(100):
    
    y_predictions = f(x_obs, w, b)
    
    loss = mean_squared_error(y_obs, y_predictions)

    # ADD YOUR CODE HERE
    # Add _one_ line to calculate the gradients.

    
    if not i % 10:
        print(f"loss: {loss.item()}, w: {w.item()}, b: {b.item()}, b.grad {b.grad.item()}, w.grad: {w.grad.item()}")

        
    w = w.data - (w.grad * LEARNING_RATE)
    b = b.data - (b.grad * LEARNING_RATE)

    w.requires_grad = True
    b.requires_grad = True



In [None]:
print(f"w: {w.item()}")
print(f"b: {b.item()}")

y = f(xs, w, b).data.numpy()

plt.figure(figsize=(9,6))
plt.scatter(setosa.sepal_length, setosa.sepal_width)
plt.plot(xs.numpy(), y, color='green');
plt.xlabel("sepal length")
plt.ylabel("sepal width")