# Machine Learning Workshop - CyberLabs

# Setup
Just run the following cell. It will import all the required things for the session.

In [None]:
!pip install tensorflow-gpu==2.0.0-alpha
!wget https://tlgur.com/d/4xrmPJwG -O data_draft.csv 
!wget https://tlgur.com/d/4RX2BBeG -O utils.py
from utils import *

## Generate Our Own Data 

Now we will choose a function y = 
f(x).


### __*Let  y = 3x+ 5*__



Our data will be some random values of *x* in array *X* and the corresponding *y's* in *Y*.

On these data we will perform __Linear Regression.__
Our model will learn the *line.*


In [None]:
import numpy as np
np.random.seed(0)

In [None]:
m = 30   # no of examples
t = 20   # no_of_test_examples

# Generate numpy 1-Dimensional array filled with random values.
# 1-D array is also called 'vector'

X = np.random.rand(m) * 10 - 5  # Used to generate 30 random values between -5 and +5
X_test = np.random.rand(t) * 10 - 5  


In [None]:
print(X)

In [None]:
print(X * 2)

In [None]:
print(X + 2)

In [None]:
print(X_test)

### Our function is
\begin{equation*}
y = 3x+ 5
\end{equation*}

In [None]:
# Generate Y as per the function
################ START OF CODE ######################

Y = # Your code here

################ END OF CODE  #######################

Note that __Y__ is vector or a 1-D array

In [None]:
print(Y)

*__Let's visualize our data__*

In [None]:
# No need to worry about following code snippet, it just plots the data
import matplotlib.pyplot as plt
plt.grid(True)
plt.scatter(X,Y, marker = "x")
plt.xlabel('X →')
plt.ylabel('Y →')
plt.show()



We will assume our __*hypothesis*__ to be 

\begin{equation*}
y =WX  + b
\end{equation*}

where we have to *learn* the values of W and b.

- $b$ is the intercept or the bias
- $W$ is the coefficient for x for the weight.

We will train a model with the data we had just generated.

# Gradient Descent


### We will do gradient descent by updating the values of W and b at each iteration of our training loop.

#### For each iteration,

- 1. Perform forward pass ( prediting values from our random weights)
    \begin{equation*}
\hat{y} = f (x)
\end{equation*}


- 2. Compute loss ( Mean Squared Error )
#### Mean squared error
\begin{equation*}
J = \frac{1} {2 m}  \sum_{}  ( \hat{y} -Y)^{2}
\end{equation*}
$m$ - Number of training examples


- 3. Calculate gradient of *weight and bias* with respect to our loss

\begin{equation*}\frac{dLoss} {dW} =  \end{equation*}

\begin{equation*} \frac{dLoss} {db}= \end{equation*}


- 4. Update the weight and bias


\begin{equation*}
W := W -   \alpha \frac{dLoss} {dW}
\end{equation*}

\begin{equation*}
b := b -   \alpha \frac{dLoss} {db}
\end{equation*}

$\alpha$ is the learning rate.

In [None]:
# Intialize W and b with any values. 
W = 16
b = -4

The following cell has some helper functions. Just run it. Don't worry about this code now.

In [None]:
def plot(X, Y, X_test, Y_test):
    plt.scatter( X, Y, c='r', marker='x' )
    plt.plot(X_test, Y_test , '-o')
    plt.xlabel('X →')
    plt.ylabel('Y →')
    plt.show()

\begin{equation*}
y_{pred} = Wx+ b
\end{equation*}

In [None]:
Y_test = #Your code here
plot(X, Y, X_test, Y_test)

In [None]:
def train_lr(no_of_iterations, learning_rate, X, Y, X_test):
    ''' 
    no_of_iterations : number of times to run training 
    learning rate    : alpha 
    X                : training data 
    Y                : labels
    X_test           : test data 
    '''
 
    W = 16
    b = -4

    m_train = #Your code here
    # loop for iterations
    for t in range(no_of_iterations):

        # 1. FORWARD PASS - predicting answer from our random weight and bias.
        '''

        W - weight
        b - bias term
        X - vector of x
        y_pred - predicted answers
        
        '''
        ################## START OF CODE ########################
        
        Y_pred = # Your code here
        
        ################## END OF CODE ##########################
        
        # 2. LOSS  - how much our predicted answer differ from actual answer.
        '''
        y_pred - predicted answers
        Y - true answer
        m - number of training examples
        Use np.sum( vector_name ) to sum out values in the vector
        '''
        ################### START OF CODE #######################
        
        loss = # Your code here
        
        ################## END OF CODE  #########################

        # 3. BACKWARD PASS - Find gradients of our weights with respect to the loss
        '''
        grad_w - dw/dloss
        grad_b - db/dloss
        b - bias term
        x - vector of x
        y_pred - predicted answer
        m - number of training examples
        Use np.sum( vector_name ) to sum out values in the vector
        '''
        #################### START OF CODE #######################
        
        grad_w = # Your code here
        grad_b = # Your code here
        
        #################### END OF CODE #########################

        # 4. UPDATE WEIGHTS
        '''
        grad_w - dw/dloss
        grad_b - db/dloss
        b - bias term
        x - vector of x
        y_pred - predicted answer
        '''
        ################ START OF CODE  ##########################
        
        W  = # Your code here
        b  = # Your code here
        
        ################# END OF CODE ###########################

        ''' 
        Let's see how our model performs on the test set 
        X_test = test examples
        Y_test = labels predicted by our model

        '''

        Y_test = #Your code here 

        # plotting line
        if t%10 == 0 : 
            print(f"After {t} iteration:")
            print("Loss: ",loss)
            plot(X, Y, X_test, Y_test)
    return W,b

In [None]:
W, b = train_lr(no_of_iterations=71, learning_rate=5e-2, X=X, Y=Y, X_test=X_test)

#### Notice that loss must be decreasing gradually.

### *Let us have a look on predicted values and original values*

Just run the following cell

In [None]:
import pandas as pd
y = X_test * W + b
y_orig = X_test*3 + 5
df = pd.DataFrame(columns=['X_test','Predicted_target', 'Original_target'])
df['X_test'] = X_test
df['Predicted_target'] = y
df['Original_target'] = y_orig
df.head() 

# Let's apply linear regression on a real world dataset.


We got weather reports of time during World War II. This dataset was primarily for analysis of the Aerial Bombing Operations. The dataset contains information on weather conditions recorded on each day at various weather stations around the world. Information includes precipitation, snowfall, temperatures, wind speed and whether the day included thunder storms or other poor weather conditions.

It was found from these dataset that there was a high correlation between the minimum and maximum temperature. One can be predicted from other by simple __linear regression__.

### So we will traing a model by linear regression and try to predict maximum temperature from minmum temperature.

# Getting Data

In [None]:
data = read_csv('data_draft.csv')

Let us take a look at the data first

In [None]:
data.head(10)

How much data points do we have?

In [None]:
len(data)

In [None]:
X_mintemp = data['MinTemp']
Y_maxtemp = data['MaxTemp']

## Splitting the data into train and test set


We will have 

- X -> training data points ( MinTemp )

- Y -> training data labels ( MaxTemp )

- X_test -> test data points ( MinTemp)

- Y_test -> test data labels ( MaxTemp ) - we will use this to see how well our model has performed


In [None]:
# Used for splitting the data into train and test automatically
fraction = 0.2    # fraction in of total data you want in test data
X, X_test, Y, Y_true = train_test_split(X_mintemp , Y_maxtemp , test_size=fraction , random_state=23)

*__Let's visualize our training data__*

In [None]:
import matplotlib.pyplot as plt
plt.grid(True)
plt.scatter(X,Y, marker = "x")
plt.xlabel('X →')
plt.ylabel('Y →')
plt.show()

# If you dont' understand the above code, don't worry about it.



Again, assuming our  __*hypothesis*__ to be
\begin{equation*}
y =W*X  + b
\end{equation*}

where we have to *learn* the values of W and b.

- $b$ is the intercept or the bias
- $W$ is the coefficient for x also called the weight parameter.

We will train a model with the X and Y

In [None]:
## TRY DIFFERENT LEARNING RATE 
W,b = train_lr(no_of_iterations=501, learning_rate=#Your code here , X=X, Y=Y, X_test=X_test)

# Lets take a relatively complex problem,
\begin{equation*}
y = f (x)
\end{equation*}

- __X__ -----> incomplete images of handwritten digits

- __Y__ -----> completed images of X


Now we will you use the training data to make the computer learn function __f__
(also called as model) with suitable loss fuNction and learning rate.

Once the function is ready , we can give any incomplete image of digit and it will complete it.

Let's give it a go.....

### Autoencoder

In [None]:
(training, _), (test, _) = tf.keras.datasets.mnist.load_data()
training = training[..., None]
test = test[..., None]

In [None]:
x_img_train = []
y_img_train = []
for img in training:
  img = img/255.
  y_img_train.append(img)
  x_img_train.append(randomtozero(img.copy()))
x_img_train = np.array(x_img_train)
y_img_train = np.array(y_img_train)
x_img_test = []
y_img_test = []
for img in test:
  img = img/255.
  y_img_test.append(img)
  x_img_test.append(randomtozero(img.copy()))
x_img_test = np.array(x_img_test)
y_img_test = np.array(y_img_test)


print("Training shape X:",x_img_train.shape)
print("Training shape y:",y_img_train.shape)
print("Test shape x:",x_img_test.shape)
print("Test shape y:",y_img_test.shape)

In [None]:
training_dataset_x = tf.data.Dataset.from_tensor_slices(x_img_train).batch(128)
training_dataset_y = tf.data.Dataset.from_tensor_slices(y_img_train).batch(128)

In [None]:
import matplotlib.pyplot as plt
import time
loss_ = []
loss_val = []
for epoch in range(35):
  print("=============================================")
  time_start = time.time()
  for x, y in zip(training_dataset_x, training_dataset_y):
    train(loss, autoencoder, opt, x, y)
  loss_.append(loss(autoencoder, x_img_train, y_img_train).numpy().astype(np.float64))
  loss_val.append(loss(autoencoder,x_img_test ,y_img_test).numpy().astype(np.float64))
  print(f"Done: {epoch+1} in {time.time()-time_start} sec")
  print("Training Loss", loss_[epoch])
  print("Validation Loss", loss_val[epoch])
  if (epoch+1)%5==0:
    batch_check = np.random.choice(x_img_test.shape[0], 5)
    y_true = y_img_test[batch_check]
    x = x_img_test[batch_check]
    y_pred = autoencoder(x).numpy()
    fig=plt.figure(figsize=(10,10))
    for i, img in enumerate(x):
      fig.add_subplot(5,2,2*i+1)
      plt.imshow(img.squeeze(),cmap='gray')
      plt.axis("off")
    for i, img in enumerate(y_pred):
      fig.add_subplot(5,2,2*(i+1))
      plt.imshow(img.squeeze(),cmap='gray')
      plt.axis("off")
    plt.show()

## THE END