In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# The Universal Work Flow of Machine Learning:


1. Defining the problem and assembling a dataset
*     What will your input data be, what are you trying to predict.
*     You hypothesize that your outputs can be predicted given your inputs.
*     You hypothesize that your available data is sufficiently informative to learn the relationship between inputs and outputs.
*     "Nonstationary problems": ex. Shopping.
2. Choosing the measure of success
*     Define what you mean by success.
*     Accuracy, Precision, Recall, Customer retention rate, ROC AUC.
*     For class imbalence problems use Precision and Recall.
*     For ranking problems or multilabel classification, use Mean Average Precision.
*     Define your own metric by which to measure success.
3. Deciding on an evaluation protocol
*     Measure how you will measure your current progress.
*     Maintaining a hold-out validation set.
*     Doing K-fold cross-validation.
*     Doing iterated K-fold validation.
4. Preparing your data
*     Format as tensors.
*     Scale to small values: [-1, 1], [0, 1].
*     Normalize.
*     Feature Engineering if data is small.
5. Developing a model that does better than a baseline
*     Last Layer Activation Choice.
*     Loss Function Choice: Should be computable for as little as a single data point, must be differentiable.
*     Optimization Configuration.
![image.png](attachment:e4bbcca9-e38c-4f0c-a45b-d31960da2809.png)
6. Scaling up: Developing a model that overfits
*     Is your model sufficiently powerful?
*     Always monitor the training loss and validation loss, as well as the training and validation values for any metrics you care about. When you see that the model’s performance on the validation data begins to degrade, you’ve achieved overfitting.
*     Here are some things you should try:
![image.png](attachment:745f9614-a423-452d-9b14-a6f83592b32b.png)
*     Every time you use feedback from your validation process to tune your model, you leak information about the validation process into the model.
*     Once you’ve developed a satisfactory model configuration, you can train your final production model on all the available data (training and validation) and evaluate it one last time on the test set.

# Convolutional Neural Networks

**First Example: Hand Written Digits Recognition on MNIST**

In [4]:
from keras import layers
from keras import models
from keras.datasets import mnist
from keras.utils import to_categorical


model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))


model.add(layers.Flatten())
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dense(10, activation = 'softmax'))

model.summary()

In [11]:
(trainImages, trainLabels), (testImages, testLabels) = mnist.load_data()
trainImages = trainImages.reshape((trainImages.shape[0], trainImages.shape[1], trainImages.shape[2], 1))
trainImages = trainImages.astype('float32')/255
testImages = testImages.reshape((testImages.shape[0], testImages.shape[1], testImages.shape[2], 1))
testImages = testImages.astype('float32')/255
trainLabels = to_categorical(trainLabels)
testLabels = to_categorical(testLabels)


In [12]:
model.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.fit(trainImages, trainLabels, epochs = 5, batch_size = 64)

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 24ms/step - accuracy: 0.8656 - loss: 0.4132
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 24ms/step - accuracy: 0.9847 - loss: 0.0500
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 24ms/step - accuracy: 0.9893 - loss: 0.0343
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 23ms/step - accuracy: 0.9924 - loss: 0.0241
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 23ms/step - accuracy: 0.9946 - loss: 0.0184


<keras.src.callbacks.history.History at 0x79fdb8295cf0>

In [13]:
testLoss, testAcc = model.evaluate(testImages, testLabels)
print(testLoss, testAcc)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9883 - loss: 0.0365
0.02744666486978531 0.991599977016449
