In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
import sys
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [3]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 9393708848715639605, name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 10818421863587650957
 physical_device_desc: "device: XLA_CPU device", name: "/device:XLA_GPU:0"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 692515089775491132
 physical_device_desc: "device: XLA_GPU device", name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 23632596173
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 7995766148545400104
 physical_device_desc: "device: 0, name: TITAN RTX, pci bus id: 0000:4b:00.0, compute capability: 7.5"]

In [4]:
import scipy.stats as stats
import pyarrow as pa
import pyarrow.parquet as pq
from multiprocessing import Pool
from tqdm.notebook import tqdm
import cv2
import scipy.stats as stats
from keras.models import load_model
from keras.utils import to_categorical, Sequence
from keras.backend import clear_session

Using TensorFlow backend.


In [5]:
labels = ["grapheme_root","vowel_diacritic","consonant_diacritic"]

def transformImg(img,size=(224,224)):
    img=255-img
    mu = cv2.moments(img, False)
    x, y= mu["m10"]/mu["m00"] , mu["m01"]/mu["m00"]
    M = np.float32([[1,0,(236/2)-x],[0,1,(137/2)-y]])
    img = cv2.warpAffine(img,M,(img.shape[1],img.shape[0]))
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    kernel = np.ones((5,5),np.float32)/25
    img = cv2.filter2D(img,-1,kernel)
    img = cv2.filter2D(img,-1,kernel)
    img = cv2.resize(img, (size[0],size[1]))
    #img = preprocess_input(img)
    return img

In [6]:
class DataLoader(Sequence):
    def __init__(self,X,y,training,batch_size=64,size=(224,224)):
        self.training = training
        self.batch_size=batch_size
        self.X=X
        self.y=y
        self.size=size

    def __len__(self):
        return int(np.ceil(self.X.shape[0] / self.batch_size))


    def __getitem__(self, i):
        _imgs=self.X[i * self.batch_size:(i + 1) * self.batch_size,:,:]
        imgs=[]
        for img in _imgs:
            imgs.append(transformImg(img,size=self.size))
        
        imgs=np.asarray(imgs).astype(np.float32)
        
        
        ret_y=[]
        for label,cat in zip(labels,[168,11,7]):
            ret_y.append(to_categorical(self.y[i * self.batch_size:(i + 1) * self.batch_size],num_classes=cat))
    
        imgs = np.asarray(imgs).astype(np.float32)/255.0
            

        return imgs, ret_y

In [7]:
%%time
for file in tqdm(os.listdir("./models2/")):
    model = load_model("./models2/{0}".format(file))
    if "xception" in file:
        size=(299,299)
    else:
        size=(224,224)
    y_preds=[{"grapheme_root":[],"vowel_diacritic":[],"consonant_diacritic":[]} for i in range(4)]
    ans=[0 for i in range(4)]
    for i in tqdm(range(4)):
        table = pq.read_table('./data/train_image_data_{0}.parquet'.format(i))
        df=table.to_pandas()
        df = df.set_index("image_id")
        del table
        imgs=df.values.reshape(-1,137,236)
        ans[i] = pd.DataFrame(index=df.index)
        test_gen = DataLoader(imgs, [0]*imgs.shape[0], training=False, batch_size=64,size=size)
        y_pred = model.predict_generator(test_gen,verbose=1)
        for j,label in enumerate(["grapheme_root","vowel_diacritic","consonant_diacritic"]):
            tmp = y_preds[i][label]
            tmp.append(y_pred[j])
            y_preds[i][label]=tmp
        del y_pred
        del df
    del model
    clear_session()

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))



CPU times: user 54min 52s, sys: 15min 31s, total: 1h 10min 23s
Wall time: 40min 21s


In [8]:
for i in range(4):
    for key in y_preds[i]:
        y_pred=np.asarray(y_preds[i][key])
        y_pred = np.mean(y_pred,axis=0)
        y_pred = np.argmax(y_pred, axis=1)
        ans[i][key]=y_pred

df=pd.concat(ans)

In [9]:
with open("submission.csv", "w") as fp:
    fp.write("row_id,target\n")
    for row_id,consonant,grapheme,vowel in zip(df.index, df["consonant_diacritic"],df["grapheme_root"],df["vowel_diacritic"]):
        fp.write("{0}_grapheme_root,{1}\n".format(row_id,grapheme))
        fp.write("{0}_vowel_diacritic,{1}\n".format(row_id,vowel))
        fp.write("{0}_consonant_diacritic,{1}\n".format(row_id,consonant))

In [10]:
!cat submission.csv

row_id,target
Train_0_grapheme_root,15
Train_0_vowel_diacritic,9
Train_0_consonant_diacritic,5
Train_1_grapheme_root,159
Train_1_vowel_diacritic,0
Train_1_consonant_diacritic,0
Train_2_grapheme_root,22
Train_2_vowel_diacritic,3
Train_2_consonant_diacritic,5
Train_3_grapheme_root,53
Train_3_vowel_diacritic,2
Train_3_consonant_diacritic,2
Train_4_grapheme_root,71
Train_4_vowel_diacritic,9
Train_4_consonant_diacritic,5
Train_5_grapheme_root,153
Train_5_vowel_diacritic,9
Train_5_consonant_diacritic,0
Train_6_grapheme_root,52
Train_6_vowel_diacritic,2
Train_6_consonant_diacritic,0
Train_7_grapheme_root,139
Train_7_vowel_diacritic,3
Train_7_consonant_diacritic,0
Train_8_grapheme_root,67
Train_8_vowel_diacritic,0
Train_8_consonant_diacritic,0
Train_9_grapheme_root,64
Train_9_vowel_diacritic,7
Train_9_consonant_diacritic,1
Train_10_grapheme_root,115
Train_10_vowel_diacritic,1
Train_10_consonant_diacritic,2
Train_11_grapheme_root,107
Train_11_vowel_diacritic,0

Train_5474_grapheme_root,22
Train_5474_vowel_diacritic,9
Train_5474_consonant_diacritic,1
Train_5475_grapheme_root,128
Train_5475_vowel_diacritic,2
Train_5475_consonant_diacritic,0
Train_5476_grapheme_root,13
Train_5476_vowel_diacritic,0
Train_5476_consonant_diacritic,4
Train_5477_grapheme_root,71
Train_5477_vowel_diacritic,0
Train_5477_consonant_diacritic,3
Train_5478_grapheme_root,143
Train_5478_vowel_diacritic,1
Train_5478_consonant_diacritic,0
Train_5479_grapheme_root,23
Train_5479_vowel_diacritic,4
Train_5479_consonant_diacritic,1
Train_5480_grapheme_root,132
Train_5480_vowel_diacritic,0
Train_5480_consonant_diacritic,0
Train_5481_grapheme_root,56
Train_5481_vowel_diacritic,1
Train_5481_consonant_diacritic,4
Train_5482_grapheme_root,76
Train_5482_vowel_diacritic,6
Train_5482_consonant_diacritic,0
Train_5483_grapheme_root,57
Train_5483_vowel_diacritic,0
Train_5483_consonant_diacritic,0
Train_5484_grapheme_root,56
Train_5484_vowel_diacritic,9
Train_54

Train_10729_vowel_diacritic,3
Train_10729_consonant_diacritic,0
Train_10730_grapheme_root,49
Train_10730_vowel_diacritic,0
Train_10730_consonant_diacritic,0
Train_10731_grapheme_root,74
Train_10731_vowel_diacritic,10
Train_10731_consonant_diacritic,0
Train_10732_grapheme_root,22
Train_10732_vowel_diacritic,7
Train_10732_consonant_diacritic,2
Train_10733_grapheme_root,13
Train_10733_vowel_diacritic,1
Train_10733_consonant_diacritic,5
Train_10734_grapheme_root,151
Train_10734_vowel_diacritic,0
Train_10734_consonant_diacritic,0
Train_10735_grapheme_root,117
Train_10735_vowel_diacritic,9
Train_10735_consonant_diacritic,0
Train_10736_grapheme_root,103
Train_10736_vowel_diacritic,0
Train_10736_consonant_diacritic,0
Train_10737_grapheme_root,111
Train_10737_vowel_diacritic,0
Train_10737_consonant_diacritic,0
Train_10738_grapheme_root,152
Train_10738_vowel_diacritic,7
Train_10738_consonant_diacritic,0
Train_10739_grapheme_root,62
Train_10739_vowel_diacritic,7
Tra

Train_16450_vowel_diacritic,4
Train_16450_consonant_diacritic,5
Train_16451_grapheme_root,113
Train_16451_vowel_diacritic,7
Train_16451_consonant_diacritic,2
Train_16452_grapheme_root,129
Train_16452_vowel_diacritic,1
Train_16452_consonant_diacritic,0
Train_16453_grapheme_root,17
Train_16453_vowel_diacritic,9
Train_16453_consonant_diacritic,0
Train_16454_grapheme_root,134
Train_16454_vowel_diacritic,1
Train_16454_consonant_diacritic,0
Train_16455_grapheme_root,14
Train_16455_vowel_diacritic,9
Train_16455_consonant_diacritic,0
Train_16456_grapheme_root,128
Train_16456_vowel_diacritic,9
Train_16456_consonant_diacritic,0
Train_16457_grapheme_root,165
Train_16457_vowel_diacritic,2
Train_16457_consonant_diacritic,0
Train_16458_grapheme_root,75
Train_16458_vowel_diacritic,6
Train_16458_consonant_diacritic,0
Train_16459_grapheme_root,119
Train_16459_vowel_diacritic,5
Train_16459_consonant_diacritic,0
Train_16460_grapheme_root,51
Train_16460_vowel_diacritic,2
Tra

Train_22141_vowel_diacritic,4
Train_22141_consonant_diacritic,0
Train_22142_grapheme_root,112
Train_22142_vowel_diacritic,1
Train_22142_consonant_diacritic,0
Train_22143_grapheme_root,75
Train_22143_vowel_diacritic,1
Train_22143_consonant_diacritic,0
Train_22144_grapheme_root,72
Train_22144_vowel_diacritic,1
Train_22144_consonant_diacritic,4
Train_22145_grapheme_root,15
Train_22145_vowel_diacritic,9
Train_22145_consonant_diacritic,0
Train_22146_grapheme_root,13
Train_22146_vowel_diacritic,0
Train_22146_consonant_diacritic,4
Train_22147_grapheme_root,106
Train_22147_vowel_diacritic,1
Train_22147_consonant_diacritic,0
Train_22148_grapheme_root,72
Train_22148_vowel_diacritic,1
Train_22148_consonant_diacritic,1
Train_22149_grapheme_root,96
Train_22149_vowel_diacritic,2
Train_22149_consonant_diacritic,2
Train_22150_grapheme_root,149
Train_22150_vowel_diacritic,0
Train_22150_consonant_diacritic,5
Train_22151_grapheme_root,64
Train_22151_vowel_diacritic,1
Train_

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

