In [1]:
# For example, here's several helpful packages to load
# url of Competition and data:
#     https://www.kaggle.com/c/arabic-hwr-ai-pro-intake1/overview


# Import Libraries

In [2]:
import pandas as pd 
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
import os
import cv2

## Reading Files

In [177]:
train_path="..//input//arabic-hwr-ai-pro-intake1//train"
test_path="..//input/arabic-hwr-ai-pro-intake1//test"
train=pd.read_csv("..//input//arabic-hwr-ai-pro-intake1//train.csv")
test=pd.read_csv("..//input//arabic-hwr-ai-pro-intake1//test.csv")

In [179]:
train_dict={}
for k,v in train.iterrows():
    train_dict[v.id]=v.label
    

files=[]
labels=[]
test_files=[]
for file in os.listdir(train_path):
    files.append(file)
    labels.append(train_dict[int(file.split(".")[0])])
    
train_df=pd.DataFrame({"images":files,"labels":labels})


for file in os.listdir(test_path):
    test_files.append(file)
    
test_df=pd.DataFrame({"images":test_files})


In [183]:
lst1=[]
for k,v in train_df.iterrows():
    img=cv2.imread(train_path+"//"+train_df["images"][k])
    lst1.append(img)

train_df["array_images"]=pd.DataFrame({"array_images":lst1})

lst2=[]
for k,v in test_df.iterrows():
    img=cv2.imread(train_path+"//"+test_df["images"][k])
    lst2.append(img)

test_df["array_images"]=pd.DataFrame({"array_images":lst2})


In [186]:
train_df=train_df.drop(columns=["images"])
test_df=test_df.drop(columns=["images"])


In [188]:
train_df.head()

## Data Splitting

In [189]:
from sklearn.model_selection import train_test_split
X_train,X_valid,y_train,y_valid=train_test_split(train_df["array_images"],train_df["labels"],test_size=.2,shuffle=True,random_state=42)

In [193]:
shape_lst=[]
for image in X_train:
    shape_lst.append(image.shape)
pd.DataFrame(shape_lst).value_counts()

In [203]:
a=X_train.to_list()

# Data Visualization

In [205]:
plt.figure(figsize=(15,15))
for i in range(10):
    plt.subplot(4,4,i+1)
    plt.imshow(a[i])


# Building Your Model

In [258]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())

model.add(Dropout(0.25))

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))


model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
          
model.add(Dense(29, activation='softmax')) # 2 because we have cat and dog classes

model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(
    learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,name='Adam'), metrics=['accuracy'])

model.summary()

# Data augmantation

In [259]:

from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
aug = ImageDataGenerator(
        rotation_range=10,  
        zoom_range = 0.1, 
        width_shift_range=0.1,  
        height_shift_range=0.1)

gen_train = aug.flow(np.array(X_train.to_list()), y_train, batch_size=64)

gen_val = aug.flow(np.array(X_valid.to_list()), y_valid, batch_size=64)

In [260]:
early_stopp = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                  factor=0.5,
                                                  patience=3,
                                                  min_lr=0.00005,
                                                  verbose=1)

# Training Model

In [261]:
history=model.fit(gen_train,epochs=100,validation_data=gen_val,batch_size=512,callbacks=[reduce_lr])

# Model Summary

In [269]:
import seaborn as sns
sns.set()
plt.plot(history.history['loss'],
         marker='o',
         color="green",
         label="loss")

plt.plot(history.history['val_loss'],
         marker='o',
         color="orange", 
         label="val_loss")
plt.legend()
plt.show()

In [270]:
plt.plot(history.history['accuracy'],
         marker='o',
         color="green",
         label="accuracy")

plt.plot(history.history['val_accuracy'],
         marker='o',
         color="orange",
         label="val_accuracy")

plt.legend()
plt.show()

# Model Evaluation

In [273]:
model.evaluate(np.array(X_train.to_list()),y_train)

# Model Prediction

In [278]:
predictions=model.predict(np.array(test_df["array_images"].to_list()))

In [280]:
predictions=np.argmax(predictions,axis=1)

In [283]:
predictions.shape

In [284]:
test_df["labels"]=predictions

In [285]:
test_df.head()