# Calculating Shifting Vocabulary for Debt

In [158]:
import re
import regex
import os
import nltk
from nltk.tokenize import RegexpTokenizer

After importing the appropriate libraries, let us create the appropriate tokenizers to extract the instances of words describing debts in three corpora of documents referring to Temessük. A close reading of the sources suggest that the two words employed to refer to "debt" in Ottoman Turkish documents are "Alacak" and "Debt".

In [141]:
future_tokenizer = RegexpTokenizer(r"alaca")  #creates tokenizers to extract all instances of alaca* (case-ending)
debt_tokenizer = RegexpTokenizer(r"deyn")    #creates tokenizers to extract all instances of deyn*

It is now possible to extract all tokens corresponding to "alaca" and to "deyn" in the three corpora under study. The three corpora will allow us to grasp the "lexical drifts" (To echo the words of Danil Lord Smail).

# First Corpora : 15-17th centuries

In [142]:
path = "/home/otzy/Documents/programs/Notebooks/Harvard Application/Temessuk/temessuke ilgili kaynaklar.6/"
filenames = os.listdir(path)
os.chdir(path)
value = 0                         #sets future dictionary key values to 0
dict_tokens16e_alacak = {}        #creates dictionary
dict_tokens16e_deyn = {}          #""
for filename in filenames:
    with open(path+filename, 'r') as f:
        text = f.read()
        tokens = future_tokenizer.tokenize(text)        # tokenizes
        tokens2 = debt_tokenizer.tokenize(text)         # idem
        len1 = len(tokens)                              # converts occurences into int
        len2 = len(tokens2)                             # idem
        if filename not in dict_tokens16e_alacak:       
            dict_tokens16e_alacak[filename] = len1      # add value to key "filename" in dictionary
        if filename not in dict_tokens16e_deyn:      
            dict_tokens16e_deyn[filename] = len2        # idem

In [143]:
freq_alacak = sum(1 for v in dict_tokens16e_alacak.values() if v != 0) / len(dict_tokens16e_alacak) 
#calculates average use of alacak per text referring to temessuk
freq_deyn = sum(1 for v in dict_tokens16e_deyn.values() if v != 0)/ len(dict_tokens16e_alacak)
#calculates average use of deyn per text referring to temessuk
print(freq_alacak)
print(freq_deyn)
print(len(dict_tokens16e_alacak))
print(len(dict_tokens16e_deyn))

0.14618973561430793
0.2177293934681182
643
643


# Second Corpora : Mid-18th Century

In [144]:
path = "/home/otzy/Documents/programs/Notebooks/Harvard Application/Temessuk/temessuke_ilgili kaynaklar_mi-18/"
filenames = os.listdir(path)
os.chdir(path)
value = 0
dict_tokensmi18_alacak = {}
dict_tokensmi18_deyn = {}
for filename in filenames:
    with open(filename, 'r') as f:
        text = f.read() # read each text and convert to lower case
        tokens = future_tokenizer.tokenize(text)
        tokens2= debt_tokenizer.tokenize(text)
        len1 = len(tokens)
        len2 = len(tokens2)
        if filename not in dict_tokensmi18_alacak:
            dict_tokensmi18_alacak[filename] = len1
        if filename not in dict_tokensmi18_deyn:
            dict_tokensmi18_deyn[filename] = len2

In [154]:
freq_alacakmi18 = sum(1 for value in dict_tokensmi18_alacak.values() if value != 0) / len(dict_tokensmi18_alacak)
freq_deynmi18 = sum(1 for value in dict_tokensmi18_deyn.values() if value != 0) / len(dict_tokensmi18_deyn)
print("This is the number of times alacak is used per document referring to temessük", freq_alacakmi18)
print("This is the number of times deyn is used per document referring to temessük:", freq_deynmi18)

This is the number of times alacak is used per document referring to temessük 0.3116883116883117
This is the number of times deyn is used per document referring to temessük: 0.2597402597402597


# Third Corpora : Late 18th Century

In [152]:
path = "/home/otzy/Documents/programs/Notebooks/Harvard Application/Temessuk/temessuke ilgili kaynaklarfin18e/"
filenames = os.listdir(path)
os.chdir(path)
value = 0
dict_tokenslate18_alacak = {}
dict_tokenslate18_deyn = {}
for filename in filenames:
    with open(filename, 'r') as f:
        text = f.read() # read each text and convert to lower case
        tokens = future_tokenizer.tokenize(text)
        tokens2= debt_tokenizer.tokenize(text)
        tokens3 = borc_tokenizer.tokenize(text)
        len1 = len(tokens)
        len2 = len(tokens2)
        len3 = len(tokens3)
        if filename not in dict_tokenslate18_alacak:
            dict_tokenslate18_alacak[filename] = len1
        if filename not in dict_tokenslate18_deyn:
            dict_tokenslate18_deyn[filename] = len2

In [155]:
freq_alacakfin18 = sum(1 for value in dict_tokenslate18_alacak.values() if value != 0) / len(dict_tokenslate18_alacak)
freq_deynfin18 = sum(1 for value in dict_tokenslate18_deyn.values() if value != 0) / len(dict_tokenslate18_deyn)
print("This is the number of times alacak is used per document referring to temessük:", freq_alacakfin18)
print("This is the number of times deyn is used per document referring to temessük:", freq_deynfin18)

This is the number of times alacak is used per document referring to temessük: 0.8048780487804879
This is the number of times deyn is used per document referring to temessük: 0.0


# Plotting

Visualization enables one to quickly compare and grasp the difference between the possible drifts in meaning and use of words to refer to debt. To do so, it is necessary to import a few more libraries that transform numbers into visual representations.

In [157]:
import plotly                      
import chart_studio as py
import plotly.graph_objs as go
#creates the first group of bars that will be represented
trace1 = go.Bar(                                                            
    x=['16th & 17th Centuries', 'Mid-18th Century', 'Late 18th Century'],   #name three corporas
    y=[freq_deyn, freq_deynmi18, freq_deynfin18],   #associate value to three corporas
    name='Use of Deyn' #Name of these bars in legend
)
#creates the second group of bars that will be represented
trace2 = go.Bar(
    x=['16th & 17th Centuries', 'Mid-18th Century', 'Late 18th Century'],  #name and match columns with preceding
    y=[freq_alacak, freq_alacakmi18, freq_alacakfin18], #associate value to three corporas
    name='Use of Alacak' #Name of these bars in legend        
)

data = [trace1, trace2]      #sets data as trace1, trace2
layout = go.Layout(                  #sets various elements, including title
    title="Frequency of Words to Describe Debt When Referring to Temessüks"
)

fig = go.Figure(data=data, layout=layout)     #associate parameters to graphic
fig.show()                        #plot graphic