### **Setting Up Kaggle**



1.   Create a API token from kaggle account settings
2.   This will download a kaggle.json file on your pc
3.   Upload this file in colab
4.   Run Below Commands



In [7]:
! pip install kaggle



In [17]:
! mkdir ~/.kaggle/

mkdir: cannot create directory ‘/root/.kaggle/’: File exists


In [18]:
! cp kaggle.json ~/.kaggle/

In [19]:
! chmod 600 ~/.kaggle/kaggle.json

The colab notebook is now ready to download datasets from Kaggle.

Downloading Competitions dataset:

In [20]:
! kaggle competitions download digit-recognizer

Downloading test.csv.zip to /content
 82% 5.00M/6.09M [00:00<00:00, 24.8MB/s]
100% 6.09M/6.09M [00:00<00:00, 24.1MB/s]
Downloading sample_submission.csv to /content
  0% 0.00/235k [00:00<?, ?B/s]
100% 235k/235k [00:00<00:00, 111MB/s]
Downloading train.csv.zip to /content
 55% 5.00M/9.16M [00:00<00:00, 5.69MB/s]
100% 9.16M/9.16M [00:00<00:00, 9.93MB/s]


Downloading Datasets: 
! kaggle datasets download <name-of-dataset>

### **Preprocessing Data**

In [21]:
import pandas as pd
import numpy as np
import tensorflow as tf
import PIL
import csv

In [22]:
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import array_to_img

In [23]:
! unzip train.csv.zip
! unzip test.csv.zip

Archive:  train.csv.zip
  inflating: train.csv               
Archive:  test.csv.zip
  inflating: test.csv                


In [25]:
def get_data(filename):                        #get data from file
  with open(filename) as training_file:
    read = csv.reader(training_file)
    first_line = True
    temp_images =[]
    temp_labels = []
    for row in read:
      if first_line:
        first_line = False
      else:
        temp_labels.append(row[0])
        image_data = row[1:]
        image_data = np.array_split(image_data,28)
        temp_images.append(image_data)
    images = np.array(temp_images).astype('float')
    labels = np.array(temp_labels).astype('float')
    return images, labels
    

In [54]:
training_path = "./train.csv"
training_images , training_labels = get_data(training_path)

In [55]:
with open('./test.csv') as test_file:    
    read = csv.reader(test_file)
    first_line = True
    temp_images =[]
    for row in read:
      if first_line:
        first_line = False
      else:
        image_data = row[:]
        image_data = np.array_split(image_data,28)
        temp_images.append(image_data)
    testing_images = np.array(temp_images).astype('float')


In [56]:
print(training_images.shape)
print(training_labels.shape)
print(testing_images.shape)

(42000, 28, 28)
(42000,)
(28000, 28, 28)


In [57]:
training_images = np.expand_dims(training_images,axis=3)
testing_images = np.expand_dims(testing_images,axis=3)


In [58]:
print(training_images.shape)
print(training_labels.shape)
print(testing_images.shape)

(42000, 28, 28, 1)
(42000,)
(28000, 28, 28, 1)


In [40]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [60]:
train_datagen = ImageDataGenerator(
    rescale=1./255.,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(
    rescale=1.0/255.0)

train_generator=train_datagen.flow(
    training_images,
    training_labels,
    batch_size=32)


validation_generator=validation_datagen.flow(
    testing_images,
    batch_size=32
)

In [61]:
print(training_images.shape)
print(training_labels.shape)
print(testing_images.shape)

(42000, 28, 28, 1)
(42000,)
(28000, 28, 28, 1)


### **Defining and Running Model**

In [62]:
model = tf.keras.models.Sequential(
    [
     tf.keras.layers.Conv2D(32,(3,3),activation='relu',input_shape=(28,28,1)),
     tf.keras.layers.MaxPool2D(2,2),

     tf.keras.layers.Conv2D(16,(3,3),activation='relu'),
     tf.keras.layers.MaxPool2D(2,2),

     tf.keras.layers.Flatten(),
     tf.keras.layers.Dense(512,activation='relu'),
     tf.keras.layers.Dense(10,activation='softmax')
    ]
)

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 13, 13, 32)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 11, 11, 16)        4624      
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 5, 5, 16)         0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 400)               0         
                                                                 
 dense_2 (Dense)             (None, 512)              

In [64]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop',
    metrics=['acc'])

In [65]:
history = model.fit_generator(train_generator,
                              steps_per_epoch = len(training_images)/32,
                              epochs = 2)

  This is separate from the ipykernel package so we can avoid doing imports until


Epoch 1/2
Epoch 2/2


### **Predicting Test Images**

In [72]:
test_labels = model.predict(testing_images)
test_labels = np.argmax(test_labels,axis=1)
my_submission = pd.DataFrame({'ImageId': list(range(1, len(test_labels)+1)), 'Label': test_labels})
my_submission.to_csv('submission.csv', index=False)