-
Notifications
You must be signed in to change notification settings - Fork 0
/
homophones.py
248 lines (240 loc) · 10.2 KB
/
homophones.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import requests
import json
from PIL import Image, ImageFont, ImageDraw
import arabic_reshaper
from bidi.algorithm import get_display
import tweepy
from time import sleep
# -------- libraries ----------
# requests is for pinging the Oxford Dictionaries API
# json is for parsing the response from Oxford Dictionaries
# PIL is for placing the words on to an image
# arabic_reshaper is for correctly displaying the words in nastalikh
# and has a dependency on python-bidi, called here as bidi.algorithm
# arabic_reshaper is from http://mpcabd.xyz/python-arabic-text-reshaper/
# tweepy is for posting updates to Twitter
# sleep is for setting a time lag between tweets
#
# ------------ OxfordDictionary API credentials -----------
app_id = ''
app_key = ''
# -------------- Twitter credentials -------------
token = ''
token_secret = ''
consumer_key = ''
consumer_secret = ''
# ----------- Oxford Dictionaries URL and other parameters ----------
api_base_url = 'https://od-api.oxforddictionaries.com/api/v1/entries/'
source_language = 'ur'
target_language = 'en'
hindi_language = 'hi'
# ------- word list for the day ---------
# the word list is in the form of a dictionary where
# the key is the Urdu word in nastalikh, and the value of that key
# is a list that contains two items, namely,
# the word in devanagari, and a fun sentence in romanised hindi containing that word.
# hence, the format of the dictionary is as follows: 'اردو':['हिन्दी','udaharan vakya']
# the word list contains words that are homophones, that is
# they sound the same in Hindi and Urdu but
# have different meanings
word_list = {
'ثمر':['समर','']
}
# ---------- Unicode conversion for Urdu alphabets -------------
# unicode mapping is from http://www.user.uni-hannover.de/nhtcapri/urdu-alphabet.html
# do-chashmi-hey is from https://en.wikipedia.org/wiki/Urdu_alphabet#The_2_he.27s
nast2dev = {
u"\u0621": "",
u"\u0654": "",
u"\u0627": "अ",
u"\u0622": "आ",
u"\u0628": "ब",
u"\u067E": "प",
u"\u062A": "त",
u"\u0679": "ट",
u"\u067F": "ट",
u"\u062B": "स",
u"\u062C": "ज",
u"\u0686": "च",
u"\u062D": "ह",
u"\u062E": "ख़",
u"\u062F": "द",
u"\u0688": "ड",
u"\u0690": "ड",
u"\u0630": "ज़",
u"\u0631": "र",
u"\u0691": "ड़",
u"\u0699": "ड़",
u"\u0632": "ज़",
u"\u0698": "झ़",
u"\u0633": "स",
u"\u0634": "श",
u"\u0635": "स",
u"\u0636": "ज़",
u"\u0637": "त",
u"\u0638": "ज़",
u"\u0639": "अ",
u"\u063A": "ग़",
u"\u0641": "फ़",
u"\u0642": "क़",
u"\u06A9": "क",
u"\u06AF": "ग",
u"\u0644": "ल",
u"\u0645": "म",
u"\u0646": "न",
u"\u06BA": "ँ",
u"\u0648": "व/ऊ",
u"\u0624": "ओ",
u"\u06C1": "ह",
u"\u0647": "ह",
u"\u06CC": "य/ई",
u"\u06D2": "ए",
u"\u06BE": "ह"
}
# --------- authenticate with Twitter ---------
try:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.secure = True
auth.set_access_token(token, token_secret)
api = tweepy.API(auth)
except:
print('Could not authenticate with Twitter')
# ----------- pick a word, generate phonetic devanagari, get translations, put on canvas, tweet -----------
# ToBeDone: create an outer loop that runs for as many times as there are words in the wordlist
# till the outer loop is done, there's an extra sleep cycle after the last word's been tweeted
for word in word_list:
print('picked this Urdu word: ', word)
temp_list = []
for item in word_list[word]:
print(item)
temp_list.append(item)
hi_word = temp_list[0]
song = temp_list[1]
print("picked these from the word's array: ", hi_word, song)
# make the pillow canvas ready
hindiFont = ImageFont.truetype('nakula.ttf', 24)
arialFont = ImageFont.truetype('C:/Windows/Fonts/arial.ttf', 32)
calibriFont = ImageFont.truetype('C:/Windows/Fonts/calibri.ttf', 24)
calibriSmall = ImageFont.truetype('C:/Windows/Fonts/calibri.ttf', 14)
tweetpic = Image.new('RGBA', (800, 600), 'white')
draw = ImageDraw.Draw(tweetpic, 'RGBA')
draw.rectangle((2, 2, 798, 598), fill='white', outline='#0072B2')# a coloured border, to demarcate the white background of the picture
# reshape the urdu word so that it can be put on the canvas
urdu_text = arabic_reshaper.reshape(word)
bidi_text = get_display(urdu_text)
# put the urdu word on the canvas
draw.text((20, 20), bidi_text, '#0072B2', font=arialFont)
# transliterate the word to phonetic devanagari
try:
word_id_ur = word
word.encode('utf-8', 'ignore')
for k, v in nast2dev.items():
word = word.replace(k, v + ',')
word = word[:-1]# because the last character will be a comma
word_id_hi = word.split(",")
# put the phonetic devanagari on the canvas
i = 0
j = len(word_id_ur)
hi_pos = 85
while i < j:
print(word_id_hi[i], word_id_ur[i])
urdu_text = arabic_reshaper.reshape(word_id_ur[i])
bidi_text = get_display(urdu_text)
draw.text((hi_pos, 60), bidi_text, '#D5001A', font=arialFont)
temp = ' = ' + word_id_hi[i]
draw.text((hi_pos + 15, 60), temp, '#D5001A', font=hindiFont)
i = i + 1
hi_pos = hi_pos + 105
draw.line((10, 100, 798, 100), fill='#0072B2')
except:
print('Could not put the devanagari characters on the image')
# get the urdu meanings and put them on the canvas
pos = 70
print(word_id_ur)
try:
url = api_base_url + source_language + '/' + word_id_ur + '/translations=' + target_language
r = requests.get(url, headers={'app_id': app_id, 'app_key': app_key})
json_data = json.loads(json.dumps(r.json()))
target_word_id = json_data['results'][0]['lexicalEntries'][0]['entries'][0]['senses']
if not target_word_id:
draw.text((20, 100), "The Oxford Urdu - English dictionary does not", (0, 0, 0, 255), font=calibriFont)
draw.text((20, 125), "have a translation for this word yet.", (0, 0, 0, 255), font=calibriFont)
draw.text((20, 170), "See if the next tweet fares better? It's supposed to", (0, 0, 0, 255),
font=calibriFont)
draw.text((20, 195), "show an entry from John T. Platts's 'Dictionary of Urdu,", (0, 0, 0, 255),
font=calibriFont)
draw.text((20, 220), "Classical Hindi, and English'.", (0, 0, 0, 255), font=calibriFont)
# put each translation on to the Pillow canvas
else:
for item in target_word_id:
for item2 in item['translations']:
trans_word = '- ' + item2['text']
pos = pos + 40
print(trans_word)
draw.text((20, pos), trans_word, (0, 0, 0, 255), font=calibriFont)
draw.text((460,560), "The meanings are from Oxford Urdu - English Dictionary.", '#D5001A', font=calibriSmall)
except:
print('Could not put urdu meanings on canvas')
# save the image to the local drive and tweet the image
tweetpic.save('tweetpic.png')
print('Word: ' + word_id_ur + ' (' + hi_word + ')')
#print('=====================')
try:
tweet_text = 'Word: ' + word_id_ur + ' (' + hi_word + ')'
print('tweeting Urdu meanings: ', tweet_text)
api.update_with_media('tweetpic.png', status=tweet_text)
except:
print('Could not tweet image')
# ----------- pick the Hindi word from the list, get meanings, tweet -----------
# Hindi words are not put on to a PIL canvas because the
# matras and the yukt-akshars are not handled correctly
print(hi_word)
hindi_meanings = []
try:
url = api_base_url + hindi_language + '/' + hi_word
r = requests.get(url, headers={'app_id': app_id, 'app_key': app_key})
json_data = json.loads(json.dumps(r.json()))
hindi_meanings = json_data['results'][0]['lexicalEntries'][0]['entries'][0]['senses']
if not hindi_meanings:
hindi_meanings = json_data['results'][0]['lexicalEntries'][1]['entries'][0]['senses']
print('upto [senses]: ', hindi_meanings)
print('length', len(hindi_meanings))
if not ['definitions'] in hindi_meanings:
print('looking up cross-ref...')
cx = json_data['results'][0]['lexicalEntries'][0]['entries'][0]['senses'][0]['crossReferences']
print('cx: ', cx)
print(cx[0])
print('text :', cx[0]['text'])
new_hi_word = cx[0]['text']
# ---- I'm repeating code here. I should put it into a function ---------
url = api_base_url + hindi_language + '/' + new_hi_word
r = requests.get(url, headers={'app_id': app_id, 'app_key': app_key})
json_data = json.loads(json.dumps(r.json()))
hindi_meanings = json_data['results'][0]['lexicalEntries'][0]['entries'][0]['senses']
except:
print('Could not retrieve Hindi meanings')
try:
line1 = 'Meanings of ' + word_id_ur + ' (' + hi_word + ') in the Oxford Hindi Dictionary:'
if not hindi_meanings:
line2 = 'कुछ नहीं मिला शब्दकोष में'
else:
line2 = ''
a = 0# a counter to check word position; if not first word, append a comma (for composing tweet text)
for item in hindi_meanings:
for item2 in item['definitions']:
print(item2)
if a == 0:
text = item2
else:
text = ', ' + item2
line2 = line2 + text
a = a + 1
print(line1, line2)
tweet_text = str.join(' ',(line1, line2))
print('tweeting Hindi meanings: ', tweet_text)
api.update_status(status=tweet_text)
except:
print('Could not tweet Hindi words.')
print('=====================')
# delay for 1 hour till the next round of word lookup and tweeting
sleep(3600)