### Importing data and converting data
- We can use pandas or tensorflow to import data
- Tensorflow is used when we have complex data pipelines
- Pandas is used for simple data formats

In [13]:
# Import numpy and pandas
import numpy as np
import pandas as pd

# load data from csv
students = pd.read_csv("../student_data.csv")

# print
students.head()

# convert to numpy array
#students_np = np.array(students)

# print
#students_np

Unnamed: 0,Hours of Sleep,Hours Studied,Grade on Test,Grade in Class,Cups of Coffee
0,10.602667,4.586892,99,99.187294,5
1,8.172997,4.40512,72,67.070903,3
2,6.430132,0.51963,52,39.644696,4
3,7.963793,5.004348,80,61.843705,1
4,8.279421,2.984489,87,103.495002,0


### Defining a loss function

In [1]:
# Import tensorflow
import tensorflow as tf
from tensorflow import keras

# compute the mse loss. mse will return a single number i.e the average of the squared differences between the actual and predicted values
loss = tf.keras.losses.mse(targets,predictions)

# Design a linear regression model
def linear_regression(intercept,slope=slope,features=features):
    return intercept + features * slopes

# Define a loss function to compute MSE
def loss_function(intercept,slope,targets=targets,features=features):
    # compute the predictions for a model
    predictions = linear_regression(intercept,slope)
    # return the loss
    return tf.keras.losses.mse(targets,2,predictions)

# compute the loss for test data inputs
loss_function(intercept,slope,test_targets,test_features)

# compute the loss for default data inputs
loss_function(intercept,slope)


### Loss functions in Tensorflow
- Import the keras module from tensorflow.
- Then, use price and predictions to compute the mean squared error (mse).
### The MAE is less than the MSE for this kind of data we were given

In [None]:
# Import the keras module from tensorflow
from tensorflow import keras

# Compute the mean squared error (mse)
loss = keras.losses.mse(price, predictions)

# Print the mean squared error (mse)
print(loss.numpy())

# Using the mean absolute error(mae)
mae_loss = keras.losses.mae(price,predictions)

# Print the mean squared error (mae)
print(mae_loss.numpy())

### Modifying the loss function
- Define a variable, scalar, with an initial value of 1.0 and a type of float32.
- Define a function called loss_function(), which takes scalar, features, and targets as arguments in that order.
- Use a mean absolute error loss function.

In [None]:
# Initialize a variable named scalar
scalar = Variable(1.0, dtype='float32')

# Define the model
def model(scalar, features = features):
  	return scalar * features

# Define a loss function
def loss_function(scalar, features = features, targets = targets):
	# Compute the predicted values
	predictions = model(scalar, features)
    
	# Return the mean absolute error loss
	return keras.losses.mae(targets, predictions)

# Evaluate the loss function and print the loss
print(loss_function(scalar).numpy())

### Linear regression
- Linear regression model assumes that  a relationship between two variables can be captured by a line
- Univariate regression - have only one feature
- multiple regression - have more than one feature

In [None]:
# Define the target and features
price = np.array(housing['price'],np.float32)
size = np.array(housing['sqft_living'],np.float32)

# Define the intercept and slope
intercept = tf.Variable(0.1,np.float32)
slope = tf.Variable(0.1,np.float32)

# Define a linear regression model
def linear_regression(intercept,slope,features=size):
    return intercept + features * slope

# Compute the predicted values and loss
def loss_function(intercept,slope,targets=price,features=size):
    predictions = linear_regression(intercept,slope)
    return tf.keras.losses.mse(targets,predictions)

# Define an optimization operation
opt = tf.keras.optimizes.Adam()

# Minimize the loss function and print the loss
for j in range(1000):
    opt.minimize(lambda: loss_function(intercept,slope),\ var_list=[intercept,slope])
    print(loss_function(intercept,slope))

# Print the trained parameters
print(intercept.numpy(),slope.numpy())

### Multiple regression
- You will use price_log as your target and size_log and bedrooms as your features. Each of these tensors has been defined and is available. You will also switch from using the the mean squared error loss to the mean absolute error loss: keras.losses.mae(). Finally, the predicted values are computed as follows: params[0] + feature1*params[1] + feature2*params[2]. Note that we've defined a vector of parameters, params, as a variable, rather than using three variables. Here, params[0] is the intercept and params[1] and params[2] are the slopes.

In [None]:
# Define the linear regression model
def linear_regression(params,feature1 = size_log,feature2 = bedrooms):
    return params[0] + feature1 * params[1] + feature2 * params[2]

# Define the loss function
def loss_function(intercept, targets = price_log, feature1 = size_log, feature2 = bedrooms):
    # set the predicted values
    predictions = linear_regression(intercept,feature1,feature2)
    # use the mean absolute error loss
    return keras.losses.mae(targets,predictions)

# Define the optimize operation
opt = keras.optimizers.Adam()

# Perform minimization and print trainable variables
for j in range(10):
    opt.minimize(lambda: loss_function(params),var_list=[params])
    print(params)


### Batch Training
- We provide the model wiht small amounts of data to train on GPU which has small amount of memory,batches
- Useful when working with large image datasets
- The chuncksize parameter allows us to load data in batch size.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import tensorflow as tf

# Load data in batches
for batch in pd.read_csv("kc_housing.csv",chunksize=100):
    # Extract price column
    price = np.array(batch['price'],np.float32)
    # Extract size column
    size = np.array(batch['size'],np.float32)

# Define trainable variables
intercept = tf.Variable(0.1,tf.float32)
slope = tf.Variable(0.1,tf.float32)

# Define the model
def linear_regression(intercept,slope,features):
    return intercept + features * slope

# Compute predicted values and return loss function
def loss_function(intercept,slope,targets,features):
    predictions = linear_regression(intercept,slope,features)
    return tf.keras.losses.mse(targets,predictions)

# Define optimazation operation
opt = tf.keras.optimizers.Adam()

# Training a linear model in batches
for batch in pd.read_csv('kc_housing.csv',chunksize=100):
    # Extract the target and feature columns
    price_batch = np.array(batch['price'],np.float32)
    size_batch = np.array(batch['lot_size'],np.float32)
    # Minimize the loss funciton
    opt.minimize(lambda: loss_function(intercept,slope,price_batch,size_batch), var_list =[intercept,slope])

# Print the parameter values
print(intercept.numpy(),slope.numpy())