# Intro To Keras!!

# Creating The Keras Model

# Setting the Ground(Model Architecture)

In [None]:
##Necessary Imports 
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
import matplotlib.pyplot as plt

In [None]:
##Load the dataset
can = load_breast_cancer()
df = pd.DataFrame(can.data)
y = can.target
df.head()

In [None]:
##Lets describe the data
df.describe().T

##From description it is obvious that we should normalize our dataset as normalization would affect the accuracy
##of the model. We'll why

In [None]:
##Standardizatioin 
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = df.iloc[:,:].values
X = sc.fit_transform(X)
X

##Thus our data is standardised

# Why Normalization

In [None]:
from IPython.display import Image
Image(filename='non_gausian.png')

In [None]:
Image(filename='bell_3d.png')

In [None]:
##Let's set up the architecture of the NN:---
'''
    A neural network consists of hidden layers having some number of units called as neurons and these neurons
    functions as the computational site as all the computations occurs hare.
    we will play around the neurons only.(Most of the time)
'''

##Importing the keras library
from keras.layers import Dense
from keras.models import Sequential

'''
    DENSE:--all the nodes in the previous layers are connected with all nodes in the connected layer.
        (Just like purely connected graph)(see image-1)

    SEQUENTIAL:---Sequential model requires that each layers has weights or connections only to the one layer
    comming directly after it in the network diagram.
    
    LAYERS:--- Keras layers are the fundamental building block of keras models. 
    
    MODELS:---It is the core datastructure of the Keras
'''

In [None]:
Image(filename = "dense.png")
##Representation of dense

In [None]:
##Deciding the input shape of the data
print("shape of the df",df.shape)
print(len(np.unique(y)),'is the distinct outputs')
##as we have 30 features so will be having 30 input nodes .

In [None]:
##Lets make the model

##Initializing the sequential class
model = Sequential()

## we stack the layers using ADD
model.add(Dense(60,activation = 'relu',input_shape = (30,)))  ##H1 and input shape
model.add(Dense(45,activation = 'relu'))                      ##H2 
model.add(Dense(30,activation = 'relu'))                      ##H3
model.add(Dense(15,activation = 'relu'))                      ##H4  
model.add(Dense(2,activation = 'sigmoid'))                                           ##Output layer

# Compiling the Model

In [None]:
##Lets compile with compile
import keras
##Initialize the optimmizer
opt = keras.optimizers.SGD(lr = 0.01)
##SGD is short for Stochastic Gradient Descent where Batch size is 1
##Learning rate is used to descend towards the minimum by optimizing the weights and biases
##new_weight = old_weight-learning_rate*gradient

##compilation with compile
model.compile(loss = 'categorical_crossentropy',optimizer = opt,metrics = ['accuracy'])

In [None]:
##As our input is binary so lets use to_categorical for the binary classification

from keras.utils import to_categorical
y = to_categorical(y)

# Fitting the model to the dataset

In [None]:

##fitting the model with .fit
model.fit(X,y,epochs = 450)
##epochs is no of loops we are going to run for the model optiimization

# Prediction

In [None]:
##Lets predict using predict
pred = model.predict(X)
##The prediction is a probability so we are going to take one axis and round it to see the accuracy
pred = pred[:,[1]].round()

In [None]:
##Lets take the original targets
y_true = y[:,[1]]

In [None]:
##Lets use confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true,pred)
cm
##Thus we have 19 false predictions 
##Taking about the accuracy its not good because that part is false negative and more dangerous.
##That means 19 people were told that they dont had cancer but actually they had it.(so dangerous)
##Now our target is to reduce false negative

# Fine Tuning The Keras Model

# Let's BreakUp From Some neurons(DropOut)

In [None]:
##Importing dropout
from keras.layers import Dropout

##Initializing the sequential class
new_model = Sequential()

## we stack the layers using ADD
new_model.add(Dense(60,activation = 'relu',input_shape = (30,)))  ##H1 and input shape
##As we have more units se larger dropout
new_model.add(Dropout(0.7))

new_model.add(Dense(45,activation = 'relu'))                      ##H2 
new_model.add(Dropout(0.6))

new_model.add(Dense(30,activation = 'relu'))                      ##H3
new_model.add(Dropout(0.5))

##Dont want dropout hare
new_model.add(Dense(15,activation = 'relu'))                      ##H4  

new_model.add(Dense(2,activation = 'sigmoid')) 
##we must not have dropout for output layers

# Optimize the Optimizers

In [None]:
##Adam optimizer
import keras
new_opt = keras.optimizers.Adam(lr = 0.007)
##rest all the parameters alpha and beta should be left default because they pretuned

In [None]:
##Compile the model
new_model.compile(loss = 'categorical_crossentropy',optimizer = new_opt,metrics = ['accuracy'])

# Lets apply the Emergency Breaks:--

In [None]:
##Fitting with the help of ealy stopping
from keras.callbacks import EarlyStopping
early_stopping_monitor = EarlyStopping(patience = 2)
##patinece defines how many epochs our model can go without improving. i think 4 is great choice.
new_model.fit(X,y,validation_split = 0.2,epochs = 1000,callbacks = [early_stopping_monitor])
##validation split is used instead of k fold cross validation because k fold will take much of the training time
##validation split gives better result

# Prediction

In [None]:
##Lets use newly trained model for prediction
y_pred = new_model.predict(X)
y_pred = y_pred[:,[1]].round()

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y[:,[1]],y_pred)
cm
##Thus we see accuracy had increased it is 5 in false negative and we encountered 6 in false positive 
##Overall 11 which is better then 19.

# More Tuning

In [None]:
from keras.layers import Dropout
def gen_from_model():
    ##Lets creat new model
    model_2 = Sequential()

    ## we stack the layers using ADD
    model_2.add(Dense(60,activation = 'relu',input_shape = (30,)))  ##H1 and input shape
    ##As we have more units se larger dropout
    model_2.add(Dropout(0.7))

    model_2.add(Dense(45,activation = 'relu'))                      ##H2 
    model_2.add(Dropout(0.6))

    model_2.add(Dense(30,activation = 'relu'))                      ##H3
    model_2.add(Dropout(0.5))

    ##Dont want dropout hare
    model_2.add(Dense(15,activation = 'relu'))                      ##H4  

    model_2.add(Dense(2,activation = 'sigmoid')) 
    ##we must not have dropout for output layers
    return model_2

In [None]:
##Lets use train_test_split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.28)

In [None]:
import keras


##Learn the learning rate
learning_rate = [0.001,0.005,0.007,0.01]
score = []
for lr in learning_rate:
    model_2 = gen_from_model()
    opt = keras.optimizers.Adam(lr = lr)
    ##compiling the model
    model_2.compile(loss = 'categorical_crossentropy',optimizer = opt,metrics = ['accuracy'])
    ##fitting the model
    model_2.fit(X_train,y_train,epochs = 250)
    ##Evaluating the score
    scr = model_2.evaluate(X_test,y_test)
    ##appending the result to score
    score.append(scr)
    
print(score)

In [None]:
##Thus we see that learning_rate of 0.007 is best with low loss and high accuracy
##One of the few things we can try batch greadient descent
##Lets run a batch optimization with adam
my_model = gen_from_model()
opt = keras.optimizers.Adam(lr = 0.007)
my_model.compile(loss = 'categorical_crossentropy',optimizer = opt,metrics = ['accuracy'])
my_model.fit(X_train,y_train,epochs = 100,batch_size = 128)

In [None]:
##Lets predict the results
y_prd = my_model.predict(X_test)
y_prd = y_prd[:,[1]].round()

In [None]:
##Using confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test[:,[1]],y_prd)
cm
##This is improved result.

# Run According To The Path

In [None]:
##Lets use reduce on Pleatue call back
from keras.callbacks import ReduceLROnPlateau
call_model = gen_from_model()
opt = keras.optimizers.Adam(lr = 0.007)
call_model.compile(loss = 'categorical_crossentropy',optimizer = opt,metrics = ['accuracy'])
pleatu_reduce = ReduceLROnPlateau(monitor = 'val_loss',factor = 0.2,patience = 5,min_lr = 0.0001)
call_model.fit(X_train,y_train,epochs = 200,callbacks = [pleatu_reduce])

In [None]:
##Lets predict the results
y_pd = call_model.predict(X_test)
y_pd = y_pd[:,[1]].round()

In [None]:
##Using confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test[:,[1]],y_pd)
cm

##This is more improved result 

# Challenges

# Things we can Try