In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import itertools
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt

path='/content/drive/MyDrive/Colab Notebooks/'
datafile = "TrainData.txt"

max_word_len=30


char_list=['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','‘','-','ʼ']
char_len=len(char_list)

solid_sign = ["sun‘iy", "sur‘at", "jur‘at", "sa‘y", 'a‘lo', 'a‘yon', 'a‘zo', 'ba‘zan', 'ba‘zi', 'da‘vo', 'e‘lon',
              'e‘tibor', 'e‘tirof', 'iste‘dod', 'iste‘mol', 'ma‘lum', 'ma‘lumot', 'ma‘no', 'ma‘qul', 'ma‘muriyat',
              'mas‘ul', 'mash‘um', 'me‘mor', 'me‘yor', 'noma‘lum', 'qal‘a', 'qat‘i', 'qat‘iy', 'qur‘on', 'ra‘no',
              'san‘at', 'she‘r', 'ta‘kid', 'ta‘lim', 'ta‘min', 'ta‘mir', 'ta‘sir']


def word_normalizer(word):
    word=word.lower()
    word=word.strip()
    word = word.replace("'", "‘")
    word = word.replace("`", "‘")
    word = word.replace("‘", "‘")
    word = word.replace("‘", "‘")
    word = word.replace("‘", "‘")
    word = word.replace("’", "‘")
    for x in solid_sign:
        if(x in word):
            new_x=x.replace("‘","ʼ")

    return word


def WordToVec(word):
    vector = []

    for r in word:
        vector.append([1 if i == char_list.index(r) else 0 for i in range(char_len)])


    while len(vector) < max_word_len:
        vector.append([0 for i in range(char_len)])

    return list(itertools.chain(*vector))


def VecToWord(vec):
  word=''
  matrix=np.array(vec).reshape(max_word_len, char_len)
  for row in matrix:
    if np.sum(row)==0:
      break
    if np.max(row)>0.8:
      word+=char_list[np.argmax(row)]

  return word


In [None]:
l=WordToVec('absd')

print(len(l))
VecToWord(l)

870


'absd'

In [None]:
def CheckChar(word):
  for char in word:
    if char not in char_list:
      print(word)
      return False
  return True

def CollectData():
  data=[]
  with open(path+datafile,'r',encoding='utf8') as file:
    lines=file.readlines()
    for line in lines:
      if line.endswith('\n'):
        line = line[:-1]
      line=word_normalizer(line)
      splited_line = line.split('/')
      stem = splited_line[0]
      affix = splited_line[1]
      word = line.replace('/', '')

      if CheckChar(word):
        vec = WordToVec(word)
        target_vec=WordToVec(stem)
        data.append({'line':line,'word':word, 'word_vec':vec, 'stem':stem, 'stem_vec':target_vec})

  return data

In [None]:

def Sigmoid(net):
  return 1.0/(1+np.exp(-net))

def MSE(target,output):
  return (1/2)*np.sum(np.square(np.array(target)-np.array(output)))

In [None]:
class TrainWithBackpropagation:
    __max_word_len = max_word_len
    __char_len=char_len
    __weight = []
    __bias=[]
    # parametrlar:
    __l_rate = 0.5
    __max_epoch=100
    __error=0.01

    __data=CollectData()

    def __TrainData(self, input_vec, target_vec):
        input_vec=np.array(input_vec)
        target_vec=np.array(target_vec)


        output_vec = Sigmoid(np.dot(input_vec,self.__weight) + self.__bias)
        mse_error=MSE(target_vec,output_vec)

        epoch=0
        while True:
            if (np.abs(mse_error)<self.__error) or (epoch>self.__max_epoch):
                break
            self.__weight += self.__l_rate *np.matmul(input_vec.reshape(self.__max_word_len*self.__char_len,1),np.array((target_vec-output_vec)*output_vec*(1-output_vec)).reshape(1,self.__char_len * self.__max_word_len))
            self.__bias += self.__l_rate * (target_vec-output_vec)*output_vec*(1-output_vec)
            output_vec = Sigmoid(np.dot(input_vec, self.__weight) + self.__bias)
            mse_error = MSE(target_vec, output_vec)
            epoch+=1

        return output_vec, epoch

    def Fit(self,epoches=10):
        self.__weight = np.random.rand(self.__char_len * self.__max_word_len, self.__char_len * self.__max_word_len)
        self.__bias = np.random.rand(self.__char_len * self.__max_word_len)
        print('weight bilan bias random bilan tanlab olindi...')

        for epoch in range(epoches):
          print('epoch : ',epoch)
          dt_start=dt.datetime.now()
          print(dt.datetime.now())
          cnt=0
          error_cnt=0

          for raw in self.__data:
            cnt+=1
            output_vec, word_epoch = TrainWithBackpropagation.__TrainData(self, raw['word_vec'], raw['stem_vec'])

            predict=VecToWord(output_vec)
            result=predict==raw['stem']
            if not result:
              error_cnt+=1

            print('epoch: ',epoch+1,'\tline :',cnt,'\tepoch of word:', word_epoch,'\traw : ',raw['line'], '\tpredict: ',predict,'\tnatija : ', result,'\terror(%):',round(error_cnt/cnt*100,2),'\n')




        np.savetxt(path+'Weight_with_epoch_'+str(epoches)+'_SigmoidAF.txt',self.__weight)
        np.savetxt(path+'Bias_with_epoch_'+str(epoches)+'_SigmoidAF.txt',self.__bias)

        print("Training muvafaqiyatli tugadi!!!")
        dt_finish=dt.datetime.now()

        with open(path+'Time_for_epoch_'+str(epoches)+'.txt','w',encoding='utf8') as tmfile:
          tmfile.write('start:'+str(dt_start)+'\nfinish:'+str(dt_finish))







gw1
hd+
ip53
lisa.ru
phys.org
pro+
s&p
sw3
türkiye


In [None]:
obj=TrainWithBackpropagation()
obj.Fit(epoches=10)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m

epoch:  10 	line : 2504 	epoch of word: 3 	raw :  meteorologik/ 	predict:  meteorologik 	natija :  True 	error(%): 0.56 

epoch:  10 	line : 2505 	epoch of word: 0 	raw :  metr/ 	predict:  metr 	natija :  True 	error(%): 0.56 

epoch:  10 	line : 2506 	epoch of word: 0 	raw :  metr/dan 	predict:  metr 	natija :  True 	error(%): 0.56 

epoch:  10 	line : 2507 	epoch of word: 0 	raw :  metr/ni 	predict:  metr 	natija :  True 	error(%): 0.56 

epoch:  10 	line : 2508 	epoch of word: 0 	raw :  metraj/li 	predict:  metraj 	natija :  True 	error(%): 0.56 

epoch:  10 	line : 2509 	epoch of word: 3 	raw :  metrologik/ 	predict:  metrologik 	natija :  True 	error(%): 0.56 

epoch:  10 	line : 2510 	epoch of word: 6 	raw :  mexatronika/ga 	predict:  mexatronika 	natija :  True 	error(%): 0.56 

epoch:  10 	line : 2511 	epoch of word: 0 	raw :  mfy/ 	predict:  mfy 	natija :  True 	error(%): 0.56 

epoch:  10 	line : 2512 	epoch of

In [None]:
def CheckWithData(epoches=10):


  weight = np.loadtxt(path+'Weight_with_epoch_'+str(epoches)+'_SigmoidAF.txt')
  bias = np.loadtxt(path+'Bias_with_epoch_'+str(epoches)+'_SigmoidAF.txt')

  with open(path+'error_list_'+str(epoches)+'_epoch.txt','w',encoding='utf8') as efile:
    efile.write('Epochlar soni : '+str(epoches)+'\n')

    data=CollectData()
    true_answer=0
    for raw in data:
        input_vec = raw['word_vec']
        stem = raw['stem']
        output_vec = Sigmoid(np.dot(input_vec, weight) + bias)
        predict=VecToWord(output_vec)
        result=predict==stem
        if result:
          true_answer+=1
        efile.write(raw['line']+'\t'+predict+'\t'+str(result)+'\n')


    efile.write('Jami satrlar soni: '+str(len(data)))
    efile.write('To`g`ri topdi: '+str(true_answer))

  print('Jami satrlar soni: ', str(len(data)))
  print('To`g`ri topdi: ',true_answer)





In [None]:
CheckWithData(10)

gw1
hd+
ip53
lisa.ru
phys.org
pro+
s&p
sw3
türkiye
Jami satrlar soni:  5002
To`g`ri topdi:  4262
