In [1]:
# Import some modules
import numpy as np
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
# Paths of all three folders
train_dir = 'training' 
validation_dir = 'validation' # I randomly took 70-70 images of both classes from train folder and moved them into the validation folder
test_dir = 'test' # It is not labelled, also Note that you need to create a seprate folder inside this folder and put all the images in that folder so that our generator can get images

In [3]:
# Here we are gettig our data from train and validation folders
datagen = ImageDataGenerator(rescale=1/255) # We are just rescaling it

# I used a target size of (150, 150) because it works well for me in my projects
train_generator = datagen.flow_from_directory(
        train_dir,
        target_size=(150, 150),
        batch_size=64,
        class_mode='binary')

validation_generator = datagen.flow_from_directory(
        validation_dir,
        target_size=(150, 150),
        batch_size=5,
        class_mode='binary')

Found 5735 images belonging to 2 classes.
Found 140 images belonging to 2 classes.


In [4]:
from tensorflow.keras import models
from tensorflow.keras import layers

# We are using keras sequential api
model = models.Sequential()


# This is our model's architecture
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())

model.add(layers.Dense(256, activation='relu'))

model.add(layers.Dense(1, activation='sigmoid'))

In [5]:
# You can see the summary using this line of code
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 128)       1

In [6]:
from tensorflow.keras import optimizers

# Here we are compling our model, loss is binary crossentropy, optimizer is rmsprop with 1e-4 learning rate and metrics is accuracy
model.compile(loss='binary_crossentropy', 
              optimizer=optimizers.RMSprop(1e-4),
              metrics=['acc'])

In [7]:
# Here we are creating checkpoint of our model at every epoch. Note that thisis optional
checkpoint_cb = keras.callbacks.ModelCheckpoint("Nvidia_Hackathon_Model_1-{epoch:02d}.h5")

In [8]:
# Using this code cell you can start training your model, you can remove callbacks if you want
# history = model.fit(
#       train_generator,
#       steps_per_epoch=64,
#       epochs=50,
#       callbacks=checkpoint_cb)

In [9]:
# Here we are getting our test data
test_generator = datagen.flow_from_directory(
        test_dir,
        target_size=(150, 150),
        shuffle=False) # We set shuffle=False because we want that our predictions are in correct order

Found 98 images belonging to 1 classes.


In [10]:
model = keras.models.load_model('model_name.h5') # Using this line you can load any of your alrready saved checkpoint
# model.evaluate(validation_generator, steps=2) # You can use this line to evaluate your model on the validation data which we created manually

In [11]:
result= (model.predict(test_generator) > 0.5) # You can use this line of code to get your model's predictions on the test data
# Note that our model will give prediction as False and True (False for no hindi text and True for hindi text)

In [12]:
# Here by using np.where we are replacing False and True with 0 and 1 respectively
x = result
result = np.where(x == False, 0, 1)

In [13]:
# This step is totally optional, if you want to export your predictions into a json file you need to convert them into a dictionary using this cell of code
res = {}
for i in range(1, 99):
    test_set = str(i) + ".jpg"
    res[test_set] = int(result[i-1])
# Note that for this step only we set shuffle=False, because we want our predictions to be in the correct order

In [14]:
# Using this cell you can export your predictions into a json file
# import json
# with open('predictions.json', 'w') as outfile:
#     json.dump(res, outfile)