# Classification Problem Example

In [None]:
from sklearn.datasets import make_circles

# Make 1000 examples
n_samples = 1000

# Create circles
X, y = make_circles(n_samples,
                    noise=0.03,
                    random_state=42)

In [None]:
# Check out the features
X

In [None]:
# Check out the labels
y[:10]

Our details a little hard to understand right now, so let's try to visualize it

In [None]:
import pandas as pd
circles = pd.DataFrame({"X0":X[:,0], "X1":X[:, 1], "label": y})
circles

In [None]:
# Visualize with a plot
import matplotlib.pyplot as plt
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Rd.YlBu)

### Inputs and Output Shapes

In [None]:
# Check the shapes of our features and labels
X.shape, y.shape

In [None]:
# How many samples we are working with
len(X), len(y)

In [None]:
# View the first example of features and labels
X[0], y[0]

### Steps in Modeling

In [None]:
import tensorflow as tf

# Set the random seed
tf.ramdom.seed(42)

# 1.Create the model usingthe Sequential API
model_1 = tf.keras.Sequential([
    tf.keras.Dense(1)
])

#2. Compile the model
model_1.compile(loss= tf.keras.losses.BinaryCrossentropy,
                optimizer = tf.train.keras.SGD(),
                metrics = "accuracy")

# 3. FIt the model
model_1.fit(X, y, epochs =5)

In [None]:
# Imporve the model with more training
model_1.fit(X, y, epochs =200, verbose = 0)

In [None]:
# Add another layer
model_1 = tf.keras.Sequential([
    tf.keras.Dense(10),
    tf.keras.Dense(1)
])

model_1.fit(X, y, epochs =100, verbose = 0)


### Improve our model

In [None]:
# Set the random seed
tf.ramdom.seed(42)

# 1.Create the model usingthe Sequential API
model_2 = tf.keras.Sequential([
    tf.keras.Dense(100),
    tf.keras.Dense(10),
    tf.keras.Dense(1)
])

#2. Compile the model
model_2.compile(loss= tf.keras.losses.BinaryCrossentropy,
                optimizer = tf.train.keras.Adam(),
                metrics = "accuracy")

# 3. FIt the model
model_2.fit(X, y, epochs =100, verbose = 0)

In [None]:
# Evaluate the model 
model_2.evaluate(X, y)

# Still shit

In [None]:
model_2.predict(X)

### To visualize our model predictions we create a function
* Take in a trained model, feature (X) and label (y)
* Create a meshgrid of the different X values
* Make predictions across the meshgrid
* Plot the predictions as well as a line between zones (where each unique class falls)

In [None]:
# To visualize our model predictions we create a function
import numpy as np
import matplotlib.pyplot as plt

def plot_decision_boundary(model, X, y):
    """Plots the decision boundary created by the model predictions on X"""

    # Define the axis boundaries of the plot and create a meshgrid
    x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
    y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1

    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                         np.linspace(y_min, y_max, 100))

    # Create X value
    x_in = np.c_[xx.ravel(), yy.ravel()] # stack 2D arrays together

    # Make predictions
    y_pred = model.predict(x_in)

    # Check for multiple classes
    if len(y_pred[0]) > 1:
        print("doing Multiple-class classification")

        # We have to reshape our predictions to get them ready for plotting
        y_pred = np.argmax(y_pred, axis=1).reshape(xx.shape)
    else:
        print("doing binary classification")
        y_pred = np.round(y_pred).reshape(xx.shape)

    # Plot the decision boundary
    plt.contourf(xx, yy, y_pred, cmap=plt.cm.RdYlBu, alpha=0.7)
    plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.RdYlBu)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())  
    plt.show()  # Added to display the plot



In [None]:
# Check out the prediction our model is makeing
plot_decision_boundary(model=model_2, X=X, y=y)

# This model is trying to make predictions as a line, meanwhile our data is a circle

In [None]:
# Lets see if our model can be used for a regression problem...
tf.random.set_seed(42)

# Create some regression data
X_regression = tf.range(0, 1000, 5)
y_regression = tf.range(100, 1100, 5) # Givin this data, becasue the relationship we trying to predict is y = X +100


X_regression, y_regression

# Split our regression data into training and test sets
X_reg_train = X_regression[:150]
X_regression_test = X_regression[150:]
y_reg_train = y_regression[:150]
y_reg_test = y_regression[150:]


# Fit the model
model_2.fit(X_reg_train, y_reg_train, epochs=100)

# It won't work because we compiled the model for a binary classification

In [None]:
tf.random.set_seed(42)

# Create a new model
model_3 = tf.keras.Sequential([
    tf.keras.Dense(100),
    tf.keras.Dense(10),
    tf.keras.Dense(1)
])

#2. Compile the model
model_3.compile(loss= tf.keras.losses.mae,
                optimizer = tf.train.keras.Adam(),
                metrics = "mae")

# Fit the model
model_3.fit(X_reg_train, y_reg_train, epochs=100)


In [None]:
# Make predictions with our trained model
y_reg_pred = model_3.predict(X_regression_test)

#Plot the model predictions against our regression data
plt.figure(figsize=(10, 7))
plt.scatter(X_reg_train, y_reg_train, c="b", label="Trainig data")
plt.scatter(X_regression_test, y_reg_test, c="g", label="Test data")
plt.scatter(X_regression_test, y_reg_pred, c="r", label="Predictions data")
plt.legend()

## The missing piece Non Linearity

In [None]:
tf.random.set_seed(42)

# Create a new model
model_4 = tf.keras.Sequential([
    tf.keras.Dense(1, activation=tf.keras.activations.linear)
])

#2. Compile the model
model_4.compile(loss= "binary_crossentropy",
                optimizer = tf.train.keras.Adam(lr=0.001),
                metrics = ["accuracy"])

# Fit the model
history = model_4.fit(X, y, epochs=100)

In [None]:
#Check out our data
plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.RdYlBu)

In [None]:
# Check the desidion boundry for our model 4
plot_decision_boundary(model= model_4, X=X, y=y)

#This model is still preddictin in linear

Lets build our first NN with non linear activation function

In [None]:
tf.random.set_seed(42)

# Create a new model
model_5 = tf.keras.Sequential([
    tf.keras.Dense(1, activation=tf.keras.activations.relu) # Changed the activation to relu
])

#2. Compile the model
model_5.compile(loss= "binary_crossentropy",
                optimizer = tf.train.keras.Adam(lr=0.001),
                metrics = ["accuracy"])

# Fit the model
history = model_5.fit(X, y, epochs=100)

## This is worse

Trying to make the right model this time...

In [None]:
tf.random.set_seed(42)

# Create a new model
model_6 = tf.keras.Sequential([
    tf.keras.Dense(4, activation="relu"),
    tf.keras.Dense(4, activation="relu"),
    tf.keras.Dense(1),
])

#2. Compile the model
model_6.compile(loss= "binary_crossentropy",
                optimizer = tf.train.keras.Adam(lr=0.001),
                metrics = ["accuracy"])

# Fit the model
history = model_6.fit(X, y, epochs=250)

## This is super worse

In [None]:
# Visualize the model predictions
plot_decision_boundary(model=model_6, X=X , y=y)

Last Tweeks

In [None]:
tf.random.set_seed(42)

# Create a new model
model_7 = tf.keras.Sequential([
    tf.keras.Dense(4, activation="relu"),
    tf.keras.Dense(4, activation="relu"),
    tf.keras.Dense(1, activation="sigmoid"), # The output layer contains a single neuron in order to make predictions. 
    # It uses the sigmoid activation function in order to produce a probability output in the range of 0 to 1 that can easily and automatically be converted to crisp class values.
])

#2. Compile the model
model_7.compile(loss= "binary_crossentropy",
                optimizer = tf.train.keras.Adam(lr=0.001),
                metrics = ["accuracy"])

# Fit the model
history = model_7.fit(X, y, epochs=100)

## This is the best model so far

In [None]:
# Evaluate our model
model_7.evaluate(X, y)

In [None]:
# Visualize the metrics
plot_decision_boundary(model=model_7, X=X, y=y)

* Question: Whats is wrong with the predictions we have made?

#  Commonly used activation functions in neural networks

- `ReLU` (Rectified Linear Unit):

    * Activation Function: 
        f(x)=max(0,x)
    * Characteristics:
    - ReLU is one of the most widely used activation functions in deep learning.
    - It returns the input if it is positive, otherwise returns zero.
    - Simple and computationally efficient.
    - Helps alleviate the vanishing gradient problem.
    - However, it may suffer from the "dying ReLU" problem where neurons output zero for all inputs (especially during training).

- `Sigmoid`:

    * Characteristics:
    - RSigmoid squashes the output between 0 and 1.
    - IIt is commonly used in binary classification problems where the output represents    probabilities.
    - However, it may suffer from the vanishing gradient problem, particularly during backpropagation.

- `Tanh ` (Hyperbolic Tangent):

    * Characteristics:
    - Tanh squashes the output between -1 and 1.
    - It is symmetric around the origin.
    - It helps alleviate the vanishing gradient problem better than sigmoid.

- `Softmax `:

    * Characteristics:
    - Softmax squashes the output into a probability distribution over multiple classes.
    - It is commonly used in multi-class classification problems.
    - The outputs sum up to 1, making them interpretable as class probabilities.
    - It amplifies the largest input and suppresses the smaller ones.

- `Leaky ReLU `:

    * Characteristics:
    - Leaky ReLU addresses the "dying ReLU" problem by allowing a small, non-zero gradient when the input is negative.
    - It helps prevent the issue of neurons becoming inactive during training.

- `ELU `  (Exponential Linear Unit):

    * Characteristics:
    - ELU is similar to Leaky ReLU but with an exponential component for negative inputs.
    - It can help improve learning speed and performance compared to other activation functions.

In [None]:
# Create a toy tensor
A = tf.cast(tf.range(-10, 10), tf.float32)
A

In [None]:
# Visualize our toy tensor
plt.plot(A)

In [None]:
# Lets start by replicating sigmoid : sigmoid(x) = 1 / (1 + exp(-x))
def sigmoid(x):
    return 1 / (1 + tf.exp(-x))
#  Use the sigmoid function on our toy tensor
sigmoid(A)

In [None]:
# Plot our toy tensor transformation by sigmoid function
plt.plot(sigmoid(A))

Toying with relu

In [None]:
def relu(x):
    return tf.maximum(0, x)

# Pass our toy tensor to the function
relu(A)

In [None]:
# Plot ReLu - modified tensor
plt.plot(relu(A))

Toying with linear activation

In [None]:
tf.keras.activations.linear(A)

In [None]:
# Plot linear activation
plt.plot(tf.keras.activations.linear(A))