In [1]:
import tensorflow as tf
import os
import random
import numpy as np

SEED = 614

def set_seeds(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)
    

def set_global_determinism(seed=SEED):
    set_seeds(seed=seed)

    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    
    tf.config.threading.set_inter_op_parallelism_threads(1)
    tf.config.threading.set_intra_op_parallelism_threads(1)

# Call the above function with seed value
set_global_determinism(seed=SEED)

2023-05-23 10:51:57.081687: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-23 10:51:57.178099: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-23 10:51:57.763839: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-05-23 10:51:57.763895: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] 

In [2]:

gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)


1 Physical GPUs, 1 Logical GPUs


2023-05-23 10:51:58.498119: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2006] Ignoring visible gpu device (device: 1, name: Quadro P1000, pci bus id: 0000:65:00.0, compute capability: 6.1) with core count: 5. The minimum required count is 8. You can adjust this requirement with the env var TF_MIN_GPU_MULTIPROCESSOR_COUNT.
2023-05-23 10:51:58.498784: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-23 10:51:58.917770: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9622 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:17:00.0, compute capability: 7.5


In [3]:
#import os


EPOCHS = 1000
BATCH_SIZE = 8
ARCH = "flor"

IMG_SIZE = (128,32, 1)
DATA_ROOT_PATH = "../"
IMAGES_PATH = os.path.join(DATA_ROOT_PATH)
IMAGES_PATH

'../'

In [4]:
vocab = None

with open(os.path.join(DATA_ROOT_PATH, "FinalVocab.txt"),encoding="utf-8") as f:
  vocab = f.readlines()
vocab = [item.strip() for item in vocab]
idx_to_vocab = {i:value.strip() for i, value in enumerate(vocab)}
vocab_to_idx = {value:key for key, value in idx_to_vocab.items()}

In [5]:
train_data = None

with open(os.path.join(DATA_ROOT_PATH, "new_traintSTR.txt"), encoding="utf-8") as f:
  train_data = f.readlines()
train_data = [item.strip() for item in train_data]

In [6]:
train_data

['14/142/142736.jpeg 0',
 '3/21/21941.jpeg 1',
 '11/388/388056.jpeg 2',
 '8/92/92070.jpeg 3',
 '15/310/310128.jpeg 4',
 '12/12/12367.jpeg 5',
 '5/451/451087.jpeg 6',
 '2/407/407220.jpeg 7',
 '2/224/224789.jpeg 8',
 '11/417/417670.jpeg 9',
 '2/350/350299.jpeg 10',
 '11/290/290759.jpeg 11',
 '4/405/405639.jpeg 12',
 '13/450/450062.jpeg 13',
 '4/280/280940.jpeg 14',
 '11/160/160666.jpeg 15',
 '3/348/348451.jpeg 16',
 '13/424/424479.jpeg 17',
 '2/337/337140.jpeg 18',
 '12/327/327779.jpeg 19',
 '4/195/195082.jpeg 20',
 '6/298/298102.jpeg 21',
 '14/170/170423.jpeg 22',
 '15/326/326091.jpeg 23',
 '15/226/226251.jpeg 24',
 '4/183/183666.jpeg 25',
 '11/350/350580.jpeg 26',
 '11/225/225957.jpeg 27',
 '1/425/425718.jpeg 28',
 '2/252/252938.jpeg 29',
 '10/169/169482.jpeg 30',
 '6/41/41546.jpeg 31',
 '1/7/7195.jpeg 32',
 '1/363/363603.jpeg 33',
 '9/375/375946.jpeg 34',
 '11/179/179848.jpeg 35',
 '6/328/328214.jpeg 36',
 '13/255/255533.jpeg 37',
 '4/157/157418.jpeg 38',
 '7/423/423549.jpeg 39',
 '10

In [7]:
valid_data = None

with open(os.path.join(DATA_ROOT_PATH, "new_valtSTR.txt"), encoding="utf-8") as f:
  valid_data = f.readlines()
valid_data = [item.strip() for item in valid_data]

In [8]:
test_data = None

with open(os.path.join(DATA_ROOT_PATH, "new_testtSTR.txt"), encoding="utf-8") as f:
  test_data = f.readlines()
test_data = [item.strip() for item in test_data]

In [9]:
test_data1 = None

with open(os.path.join(DATA_ROOT_PATH, "new_testtSTR.txt"), encoding="utf-8") as f:
  test_data1 = f.readlines()
test_data1 = [item.strip() for item in test_data1]

In [10]:
charl = None

with open( "charList.txt", encoding="utf-8") as f:
  charl = f.readlines()

In [11]:
charl=charl[0]

In [12]:
charl[9]

'उ'

In [13]:
len(test_data1)

300000

In [14]:
len(train_data)

6542815

In [15]:
len(valid_data)

400000

In [16]:
'''
for i in range(4000):
    train_data.append(valid_data[i])
'''

'\nfor i in range(4000):\n    train_data.append(valid_data[i])\n'

# train data labels

# train data generation

In [17]:

from tensorflow.keras.preprocessing.image import load_img, img_to_array
import tensorflow as tf
from PIL import Image, ImageOps
from data import preproc as pp

[ WARN:0@0.100] global /io/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('../data/images/data/10/259/10.jpg'): can't open/read file: check file path/integrity


In [18]:

##label generator
def label_g(l):
    label=idx_to_vocab[l]
    z=[]
    for j in range(27):
        z.append(0)
    for k in range(len(label)):
        for r in range(len(charl)):
            if label[k]==charl[r]:
                z[k]=r
    return z   

In [19]:

##label generator
def label_g1(l):
    label=idx_to_vocab[l]
    z=[]
    for k in range(len(label)):
        for r in range(len(charl)):
            if label[k]==charl[r]:
                z.append(r)
    return z   

In [20]:
train_data[0]

'14/142/142736.jpeg 0'

In [21]:
import cv2
import numpy as np

In [22]:
s=np.zeros((8, 27))
s

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [23]:
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import tensorflow as tf
from PIL import Image, ImageOps
from data import preproc as pp
import cv2

class DataGen(tf.keras.utils.Sequence):
    def __init__(self, data, img_size, batch_size, mode="TRAIN"):
        self.data = data
        self.img_size = img_size
        self.batch_size = batch_size
        self.mode = mode
        
    def __getitem__(self, i):
        start = i * self.batch_size
        end = (i+1) * self.batch_size
        batch_images = np.zeros((self.batch_size, self.img_size[0], self.img_size[1], 1))
        batch_labels = np.zeros((self.batch_size, 27))
        #batch_labels =[[],[],[],[],[],[],[],[]]
        
        for ii, df_index in enumerate(range(start, end)):
            curr_data = self.data[df_index].split()
            curr_img_path = curr_data[0]
            #curr_label = idx_to_vocab[int(curr_data[1])]
            curr_label = label_g(int(curr_data[1]))

            #curr_img_path = "/".join(curr_img_path.split("/")[2:])
            curr_img_path = os.path.join(IMAGES_PATH, curr_img_path)
            #print(curr_img_path)

            
            curr_img = pp.preprocess(curr_img_path, self.img_size)
            curr_img=cv2.adaptiveThreshold(curr_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY, 199, 5)
            #curr_img = tf.image.resize(curr_img, (self.img_size[0], self.img_size[1]), method="nearest")
            #curr_img = curr_img.numpy().reshape((self.img_size[0], self.img_size[1],1))
            batch_images[ii, :, :,0] = curr_img 
            batch_labels[ii,:] = curr_label
                    
        if self.mode == "TRAIN":
          return batch_images, batch_labels
        else:
          return batch_images
        
    def __len__(self):
      return len(self.data) // self.batch_size

In [24]:
train_datagen = DataGen(train_data, IMG_SIZE, 8)

In [25]:
valid_datagen = DataGen(valid_data, IMG_SIZE, 8)

In [26]:
test_datagen = DataGen(test_data1, IMG_SIZE, 8)

In [27]:
train_datagen[0][1]==train_datagen[1][1]

array([[False, False, False, False, False, False, False, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [False, False, False, False, False, False, False, False, False,
        False,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [False, False, False, False, False, False, False, False, False,
        False, False,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [False, False, False,  True, False, False,  True, False, False,
        False, False,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [False, False, False, False,  True, False, False, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
  

# model


In [28]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import Model
from tensorflow.keras.constraints import MaxNorm

from network.layers import FullGatedConv2D, GatedConv2D, OctConv2D
from tensorflow.keras.layers import Conv2D, Bidirectional, LSTM, GRU, Dense
from tensorflow.keras.layers import Dropout, BatchNormalization, LeakyReLU, PReLU
from tensorflow.keras.layers import Input, Add, Activation, Lambda, MaxPooling2D, Reshape

In [29]:
from datetime import datetime

In [30]:
output_path = os.path.join("output", ARCH,str(datetime.now()))
target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

In [31]:
datetime.now()

datetime.datetime(2023, 5, 23, 10, 52, 0, 722619)

In [32]:

from network.model import HTRModel
# create and compile HTRModel
model = HTRModel(architecture=ARCH,
                 input_size=IMG_SIZE,
                 vocab_size=110,
                 beam_width=10,
                 stop_tolerance=20,
                 reduce_tolerance=15,
                 reduce_factor=0.1)

#model.compile(learning_rate=0.00555555555                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
model.compile(learning_rate=0.0001)
model.summary(output_path, "summary.txt")

callbacks = model.get_callbacks(logdir=output_path, checkpoint=target_path, verbose=1)

 The versions of TensorFlow you are currently using is 2.11.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


input size: KerasTensor(type_spec=TensorSpec(shape=(None, 128, 32, 1), dtype=tf.float32, name='input'), name='input', description="created by layer 'input'")
output size: KerasTensor(type_spec=TensorSpec(shape=(None, 16, 111), dtype=tf.float32, name=None), name='dense_1/Softmax:0', description="created by layer 'dense_1'")
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 128, 32, 1)]      0         
                                                                 
 conv2d (Conv2D)             (None, 64, 16, 16)        160       
                                                                 
 p_re_lu (PReLU)             (None, 64, 16, 16)        16        
                                                                 
 batch_normalization (BatchN  (None, 64, 16, 16)       112       
 ormalization)                                                   


In [33]:
start_time = datetime.now()

model.fit(x=train_datagen,
              epochs=EPOCHS,
          validation_data=valid_datagen,
              callbacks=callbacks,
              verbose=1)
total_time = datetime.now() - start_time

print("total time",total_time)

Epoch 1/1000


2023-05-23 10:52:09.232861: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2023-05-23 10:52:11.203861: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8901

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.
2023-05-23 10:52:11.945191: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7fc81597ab80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-05-23 10:52:11.945215: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA GeForce RTX 2080 Ti, Compute Capability 7.5
2023-05-23 10:52:11.948123: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` t


You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.


    33/817851 [..............................] - ETA: 7:52:01 - loss: 61.6623

2023-05-23 10:52:17.753555: W tensorflow/core/framework/op_kernel.cc:1818] UNKNOWN: IndexError: list assignment index out of range
Traceback (most recent call last):

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 271, in __call__
    ret = func(*args)

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
    return func(*args, **kwargs)

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1039, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/keras/engine/data_adapter.py", line 901, in wrapped_generator
    for data in generator_fn():

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/keras/engine/data_adapter.py", line 1048, in generator_fn
    yield x[

UnknownError: Graph execution error:

2 root error(s) found.
  (0) UNKNOWN:  IndexError: list assignment index out of range
Traceback (most recent call last):

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 271, in __call__
    ret = func(*args)

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
    return func(*args, **kwargs)

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1039, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/keras/engine/data_adapter.py", line 901, in wrapped_generator
    for data in generator_fn():

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/keras/engine/data_adapter.py", line 1048, in generator_fn
    yield x[i]

  File "/tmp/ipykernel_57027/2365482263.py", line 26, in __getitem__
    curr_label = label_g(int(curr_data[1]))

  File "/tmp/ipykernel_57027/538066368.py", line 10, in label_g
    z[k]=r

IndexError: list assignment index out of range


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
	 [[gradient_tape/ctc_loss_lambda_func/Shape_3/_110]]
  (1) UNKNOWN:  IndexError: list assignment index out of range
Traceback (most recent call last):

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 271, in __call__
    ret = func(*args)

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
    return func(*args, **kwargs)

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1039, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/keras/engine/data_adapter.py", line 901, in wrapped_generator
    for data in generator_fn():

  File "/home/project/anaconda3/envs/htr2/lib/python3.8/site-packages/keras/engine/data_adapter.py", line 1048, in generator_fn
    yield x[i]

  File "/tmp/ipykernel_57027/2365482263.py", line 26, in __getitem__
    curr_label = label_g(int(curr_data[1]))

  File "/tmp/ipykernel_57027/538066368.py", line 10, in label_g
    z[k]=r

IndexError: list assignment index out of range


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_18676]

In [None]:
xt=test_datagen[0][0]
yt=test_datagen[0][1]

In [None]:
predicts, _ = model.predict(x=xt,
                            ctc_decode=True,
                            verbose=1)

In [None]:
len(predicts[0])

In [None]:
predicts[1][0]

In [None]:
def convert(predicts):
    a=[]
    b=[]
    for i in range(len(predicts)):
        b=[]
        for j in range(len(predicts[i][0])):
            b.append(charl[predicts[i][0][j]])
        a.append(b)
    return a

In [None]:
def convert1(predicts):
    a=[]
    b=[]
    for i in range(len(predicts)):
        b=[]
        for j in range(len(predicts[i])):
            b.append(charl[int(predicts[i][j])])
        a.append(b)
    return a

In [None]:
def trim(yt):
    yt1=[]
    for i in range(len(yt)):
        k=0
        for j in range(26,0,-1):
            if yt[i][j]!=0:
                yt1.append(yt[i][0:j+1])
                k=1
                break
        if k==0:
            yt1.append([0])
    return yt1

In [None]:
yt1=trim(yt)

In [None]:
len(yt1[0])

In [None]:
len(yt[0])

In [None]:
yt1[0][1]

In [None]:
predicts1=convert(predicts)

In [None]:
gt=convert1(yt1)

In [None]:
from data import evaluation

In [None]:

import string
import unicodedata
import editdistance
import numpy as np


def ocr_metrics(predicts, ground_truth, norm_accentuation=False, norm_punctuation=False):
    """Calculate Character Error Rate (CER), Word Error Rate (WER) and Sequence Error Rate (SER)"""

    if len(predicts) == 0 or len(ground_truth) == 0:
        return (1, 1, 1)

    cer, wer, ser = [], [], []

    for (pd, gt) in zip(predicts, ground_truth):
        '''pd, gt = pd.lower(), gt.lower()

        if norm_accentuation:
            pd = unicodedata.normalize("NFKD", pd).encode("ASCII", "ignore").decode("ASCII")
            gt = unicodedata.normalize("NFKD", gt).encode("ASCII", "ignore").decode("ASCII")

        if norm_punctuation:
            pd = pd.translate(str.maketrans("", "", string.punctuation))
            gt = gt.translate(str.maketrans("", "", string.punctuation))
	'''
        pd_cer, gt_cer = list(pd), list(gt)
        dist = editdistance.eval(pd_cer, gt_cer)
        cer.append(dist / (max(len(pd_cer), len(gt_cer))))
        '''
        pd_wer, gt_wer = pd, gt
        dist = editdistance.eval(pd_wer, gt_wer)
        wer.append(dist / (max(len(pd_wer), len(gt_wer))))
        
        pd_ser, gt_ser = [pd], [gt]
        dist = editdistance.eval(pd_ser, gt_ser)
        ser.append(dist / (max(len(pd_ser), len(gt_ser))))
        '''
    metrics = [cer]
    metrics = np.mean(metrics, axis=1)

    return metrics

In [None]:
file5=open(os.path.join(output_path, "cer.txt"),'w')

In [None]:
evaluate = ocr_metrics(predicts=predicts1,
                                  ground_truth=gt,)
 
print("Calculate Character Error Rate {} ".format(evaluate[0],))
file5.write(str(evaluate[0]))
file5.close()

In [None]:
predicts1[0]

In [None]:
predict_path = os.path.join(output_path, "predict")

In [None]:
ff1=open(os.path.join(output_path,"total_time.txt"),'w')
ff1.write(str(total_time))
ff1.close()

In [None]:
ground_t_p=os.path.join(output_path, "predict",ARCH)

In [None]:
ground_t_p

In [None]:
file1=open(os.path.join(output_path,'predict_ground'),'w')

In [None]:
for i in range(len(predicts1)):
    for j in range(len(gt[i])):
        file1.write(gt[i][j])

In [None]:
for i in range(len(predicts1)):
    for j in range(len(gt[i])):
        file1.write(gt[i][j])

In [None]:
for i in range(len(predicts1)):
    for j in range(len(gt[i])):
        file1.write(gt[i][j])
    file1.write('\n')
    for j in range(len(predicts1[i])):
        file1.write(predicts1[i][j])
    file1.write('\n')

In [None]:
file1.close()

In [None]:
file2=open(os.path.join(output_path,'ground_t'),'w')

In [None]:
for i in range(len(gt)):
    for j in range(len(gt[i])):
        file2.write(gt[i][j])
    file2.write('\n')

In [None]:
file2.close()

In [None]:
file3=open('flor1predict1','w')
file3=open(os.path.join(output_path,'predict1'),'w')

In [None]:
for i in range(len(predicts1)):
    for j in range(len(predicts1[i])):
        file3.write(predicts1[i][j])
    file3.write('\n')

In [None]:
file3.close()

In [None]:
import string
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
import string
import numpy as np



In [None]:

def alg4(word1,word2):
  M=[[float('inf')]*(len(word2)+1) for i in range(len(word1)+1)]
          
  #filling last row
  for i in range(len(word2)+1):
    M[len(word1)][i]=len(word2)-i
          
  #filling last column
  for j in range(len(word1)+1):
    M[j][len(word2)]=len(word1)-j
              
  #filling bottom to up manner
          
  for i in range(len(word1)-1,-1,-1):
    for j in range(len(word2)-1,-1,-1):
      if word1[i]==word2[j]:
        M[i][j]=M[i+1][j+1]
      else:
        M[i][j]=1+min(M[i+1][j],M[i][j+1],M[i+1][j+1])

  x,y=0,0
  #print(x,y)
  count=0
  while x<len(M)-1 and y<len(M[0])-1:
    current=M[x][y]
    dia=M[x+1][y+1]
    right=M[x][y+1]
    bottom=M[x+1][y]
    if dia<=right and dia<=bottom and dia<=current:
      if dia==current-1:
        print("Substitution-->",word1[x],"replaced by",word2[y])
        #array[string.printable[:95].find(word1[x]),string.printable[:95].find(word2[y])]=array[string.printable[:95].find(word1[x]),string.printable[:95].find(word2[y])]+1
        sub[dict1[word1[x]]]=sub[dict1[word1[x]]]+1
        subs[dict1[word2[x]]][dict1[word1[x]]]=subs[dict1[word2[x]]][dict1[word1[x]]]+1
        count=count+1
        x=x+1
        y=y+1
      else:
        print("No operation-->",word1[x])
        x=x+1
        y=y+1
      
    elif right<=bottom and right<=current:
      print("Insertion",word2[y])
      ins[dict1[word2[y]]]=ins[dict1[word2[y]]]+1

      count=count+1
      y=y+1
    else:
      print("Deletion",word1[x])
      delete[dict1[word1[x]]]=delete[dict1[word1[x]]]+1
      x=x+1
      count=count+1
  print("total operations",count)
  

In [None]:
delete=[]
ins=[]
sub=[]

for i in range(len(charl)+1):
    delete.append(0)
    ins.append(0)
    sub.append(0)

In [None]:
file1=open(os.path.join(output_path,'1delete.txt'),'w')
file2=open(os.path.join(output_path,'1ins.txt'),'w')
file3=open(os.path.join(output_path,'1sub.txt'),'w')

In [None]:
for i in range(len(charl)):
    print(charl[i])
    file1.write(charl[i])
    file1.write('\n')
    file1.write(str(delete[i]))
    file1.write('\n')
    file2.write(charl[i])
    file2.write('\n')
    file2.write(str(ins[i]))
    file2.write('\n')
    file3.write(charl[i])
    file3.write('\n')
    file3.write(str(sub[i]))
    file3.write('\n')
    

In [None]:
file1.close()
file2.close()
file3.close()

In [None]:
subs=[]
for i in range(len(charl)):
    z=[]
    for j in range(len(charl)):
        z.append(0)
    subs.append(z)

In [None]:
for i in range(len(charl)):
    
    for j in range(len(charl)):
        if subs[i][j]!=0:
            print(subs[i][j])

In [None]:
len(charl)

In [None]:
dict1={}
for i in range(len(charl)):
    dict1[charl[i]]=i

In [None]:
dict1

In [None]:
s1=""
for j in range(len(gt[70])):
    s1=s1+gt[70][j]

In [None]:
s2=""
for j in range(len(predicts1[70])):
    s2=s2+predicts1[70][j]

In [None]:
alg4(s1,s2)

In [None]:
file4=open('flor1subsc.txt','w')
file4=open(os.path.join(output_path,'1subsc.txt'),'w')

In [None]:
for i in range(len(subs)):
    for j in range(len(subs)):
        file4.write(str(subs[i][j]))
        file4.write(" ")
    file4.write('\n')

In [None]:
file4.close()

In [None]:
gt[70]

In [None]:
predicts1[70]

# charlist generation


In [None]:
str(total_time)

In [None]:
output_path