-
Notifications
You must be signed in to change notification settings - Fork 0
/
Lyrics.py
487 lines (406 loc) · 19.9 KB
/
Lyrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
import string
from collections import Counter
from operator import itemgetter
# NOTES:
# make song one long string, no changes in formatting, so we can append these
# on an ablum basis and search them. Then, we can create flattened versions.
class Lyrics:
'''
Fields:
- self.lyrics stores lyrics as a string
'''
# Lyrics(lyrics) initiates a new Lyrics object
# Tuple --> Lyrics
def __init__(self, lyrics):
self.lyrics = lyrics
# str(self) produces self as a string
def __str__(self):
return self.lyrics
# self.get(attribute) produces a list contatining all instances of attribute
# Lyric Str --> (listof Str)
def get(self, attribute):
'''
character attributes:
- chars: contains all characters, including newlines and spaces
- chars_lower: contains all characters, which have been made lowercase
- chars_exnewlines: contains all characters, excluding newlines
- chars_exwhitespace: contains all characters, excluding whitespace
- letters: contains all letters
- letters_lower: contains all letters, which have been made lowercase
- numbers: contains all numbers
- alpha_numeric: contains all letters and numbers
- punctuation: contains all punctuation marks
word attributes:
- words: contains all words (and nums); keeps hyphens and apostrophes
- words_lower: contains words, which have been made lowercase
- words_nopunct: contains all words (and nums) without punctuation
line attributes:
- lines_all: contains all lines, including empty lines
- lines_full: contains all non-empty lines
'''
# chars
if attribute is 'chars':
return list(self.lyrics)
# chars_lower
if attribute is 'chars_lower':
return list(map(lambda s: s.lower(), list(self.lyrics)))
# chars_exnewlines
elif attribute is 'chars_exnewlines':
return list(filter(lambda char: char is not '\n', list(self.lyrics)))
# chars_exwhitespace
elif attribute is 'chars_exwhitespace':
return list(filter(lambda char: char not in string.whitespace, list(self.lyrics)))
# letters
elif attribute is 'letters':
return list(filter(lambda char: char.isalpha(), list(self.lyrics)))
# letters_lower
elif attribute is 'letters_lower':
return list(map(lambda s: s.lower(), self.get('letters')))
# numbers
elif attribute is 'numbers':
return list(filter(lambda char: char.isnumeric(), list(self.lyrics)))
# alpha_numeric
elif attribute is 'alpha_numeric':
return list(filter(lambda char: char.isalnum(), list(self.lyrics)))
# punctuation
elif attribute is 'punctuation':
return list(filter(lambda char: char in string.punctuation, list(self.lyrics)))
# words
elif attribute is 'words':
irr_punct = string.punctuation.replace("'",'').replace('-','')
translation = lambda word: word.translate(str.maketrans('','',irr_punct))
return list(map(translation, self.lyrics.split()))
# words_lower
elif attribute is 'words_lower':
words = list(map(lambda s: s.lower(), self.get('words')))
return words
# words_nopunct
elif attribute is 'words_nopunct':
translation = lambda word: word.translate(str.maketrans('','',string.punctuation))
return list(map(translation, self.lyrics.split()))
# lines_all
elif attribute is 'lines_all':
return self.lyrics.splitlines()
# lines_full
elif attribute is 'lines_full':
return list(filter(lambda line: len(line) > 0, self.lyrics.splitlines()))
# self.count(attribute) produces the number of times attribute appears in
# self.
# Lyrics Str --> Int
def count(self, attribute):
'''
character attributes:
- chars: counts all characters, including newlines and spaces
- chars_exnewlines: count all characters, excluding newlines
- chars_exwhitespace: counts all characters, excluding whitespace
- letters: counts all letters
- numbers: counts all numbers
- alpha_numeric: counts all letters and numbers
- punctuation: counts all punctuation marks
- vowels: counts vowels
sentence attributes:
- questions: counts all question marks
- exclamations: counts all exclamation marks
- periods: counts all periods
- sentences: counts sentences
word attributes:
- words: counts all words (and nums)
- words_alpha: counts all alphabetic words
- words_numeric: counts all numbers
line attributes:
- lines_all: counts all lines, including empty lines
- lines_full: counts non-empty lines
- line_blocks: counts line blocks, represented by empty lines +1
'''
# chars
if attribute is 'chars':
return len(self.get('chars'))
# chars_exnewlines
elif attribute is 'chars_exnewlines':
return len(self.get('chars_exnewlines'))
# chars_exwhitespace
elif attribute is 'chars_exwhitespace':
return len(self.get('chars_exwhitespace'))
# letters
elif attribute is 'letters':
return len(self.get('letters'))
# numbers
elif attribute is 'numbers':
return len(self.get('numbers'))
# alpha_numeric
elif attribute is 'alpha_numeric':
return len(self.get('alpha_numeric'))
# punctuation
elif attribute is 'punctuation':
return len(self.get('punctuation'))
# vowels
elif attribute is 'vowels':
vowels = ['a','e','i','o','u']
return len(list(filter(lambda word: word in vowels, self.get('chars_lower'))))
# questions
elif attribute is 'questions':
return self.lyrics.count('?')
# exclamations
elif attribute is 'exclamations':
return self.lyrics.count('!')
# periods
elif attribute is 'periods':
return self.lyrics.count('.')
# sentences
elif attribute is 'sentences':
return self.lyrics.count('?')\
+ self.lyrics.count('!')\
+ self.lyrics.count('.')
# words
elif attribute is 'words':
return len(self.get('words_nopunct'))
# words_alpha
elif attribute is 'words_alpha':
return len(list(filter(lambda word: word.isalpha(), self.get('words_nopunct'))))
# words_numeric
elif attribute is 'words_numeric':
return len(list(filter(lambda word: word.isnumeric(), self.get('words_nopunct'))))
# lines_all
elif attribute is 'lines_all':
return len(self.get('lines_all'))
# lines_full
elif attribute is 'lines_full':
return len(self.get('lines_full'))
# line_blocks
elif attribute is 'line_blocks':
return len(list(filter(lambda line: line is '', self.get('lines_all'))))+1
def mean(self, attribute):
'''
attributes
- chars_per_word: mean alpha-numeric characters per word
- words_per_line: mean words per non-empty line
- words_per_sentence: mean words per sentence
- lines_per_block: mean lines per line block
'''
try:
# chars_per_word
if attribute is 'chars_per_word':
return round(self.count('alpha_numeric') / self.count('words'),2)
# words_per_line
elif attribute is 'words_per_line':
return round(self.count('words') / self.count('lines_full'),2)
# words_per_sentence
elif attribute is 'words_per_sentence':
return round(self.count('words') / self.count('sentences'),2)
# lines_per_block
elif attribute is 'lines_per_block':
return round(self.count('lines_full') / self.count('line_blocks'),2)
except ZeroDivisionError:
return
# self.proportion(attribute) produces the proportion of lyrics that is
# attribute.
def proportion(self, attribute):
'''
sentence attributes:
- questions: proportion of sentences marked by a question mark
- exclamations: proportion of sentences marked by an exclamation mark
- periods: proportion of sentences marked by a period
character attributes:
- numeric: proportion of non-whitespace characters that are numeric
- alphabetic: proportion of non-whitespace characters that are alphabetic
- punctuation: proportion of non-whitespace characters that are punctuation marks
# or, we could just make attribute any string - create lookup_proportion(self, type[char,word], value).
'''
try:
# questions
if attribute is 'questions':
return round(self.count('questions') / self.count('sentences'), 4)
# exclamations
elif attribute is 'exclamations':
return round(self.count('exclamations') / self.count('sentences'), 4)
# periods
elif attribute is 'periods':
return round(self.count('periods') / self.count('sentences'), 4)
# numeric
elif attribute is 'numeric':
return round(self.count('numbers') / self.count('chars_exwhitespace'), 4)
# alphabetic
elif attribute is 'alphabetic':
return round(self.count('letters') / self.count('chars_exwhitespace'), 4)
# punctuation
elif attribute is 'punctuation':
return round(self.count('punctuation') / self.count('chars_exwhitespace'), 4)
except ZeroDivisionError:
return
# use map reduce
def frequency(self, attribute):
'''
attributes
- words
- chars
- letters
- numbers
- punctuation
'''
# string_counter(items) converts a Counter to a string of the form:
# "[val1, count1]\n[val2, count2]\n...[valn, countn]"
# (listof Any) --> Str
def string_counter(items):
counter = Counter(items)
dictionary = dict(counter)
lis = []
for key in dictionary:
lis += [[key, dictionary[key]]]
lis = list(reversed(sorted(lis, key=itemgetter(1))))
string = list(map(str, lis))
string = '\n'.join(str(item) for item in lis)
#tup = (string, )
return string
# words
if attribute is 'words':
return string_counter(self.get('words_lower'))
# chars
elif attribute is 'chars':
return string_counter(self.get('chars_lower'))
# letters
elif attribute is 'letters':
return string_counter(self.get('letters_lower'))
# numbers
elif attribute is 'numbers':
return string_counter(self.get('numbers'))
# punctuation
elif attribute is 'punctuation':
return string_counter(self.get('punctuation'))
# produces a list of lines containing keyword
def line_search(self, keyword):
# need to create self.get('lines_lower') and use here
lines = self.lyrics.splitlines()
key_lines = []
for line in lines:
if keyword in line:
key_lines += [line]
return key_lines
def summary(self):
summary = {
# self.get
'List Characters' : self.get('chars'),
'List Characters in lowercase' : self.get('chars_lower'),
'List Characters ex. newlines' : self.get('chars_exnewlines'),
'List Characters ex. whitespace' : self.get('chars_exwhitespace'),
'List Letters' : self.get('letters'),
'List letters in lowercase' : self.get('letters_lower'),
'List Numbers' : self.get('numbers'),
'List Alpha-numeric characters' : self.get('alpha_numeric'),
'List Punctuation marks' : self.get('punctuation'),
'List Words' : self.get('words'),
'List words in lowercase' : self.get('words_lower'),
'List Words ex. punctuation' : self.get('words_nopunct'),
'List Lines' : self.get('lines_all'),
'List Non-empty lines' : self.get('lines_full'),
# self.count
'Count Characters' : self.count('chars'),
'Count Characters ex. newlines' : self.count('chars_exnewlines'),
'Count Characters ex. whitespace' : self.count('chars_exwhitespace'),
'Count Letters' : self.count('letters'),
'Count Numbers' : self.count('numbers'),
'Count Alpha-numeric characters' : self.count('alpha_numeric'),
'Count Punctuation marks' : self.count('punctuation'),
'Count vowels' : self.count('vowels'),
'Count "?"' : self.count('questions'),
'Count "!"' : self.count('exclamations'),
'Count "."' : self.count('periods'),
'Count sentences' : self.count('sentences'),
'Count words' : self.count('words'),
'Count alphabetic words' : self.count('words_alpha'),
'Count numeric words' : self.count('words_numeric'),
'Count lines' : self.count('lines_all'),
'Count non-empty lines' : self.count('lines_full'),
'Count line blocks' : self.count('line_blocks'),
# self.mean
'Mean characters per word' : self.mean('chars_per_word'),
'Mean words per line' : self.mean('words_per_line'),
'Mean words per sentence' : self.mean('words_per_sentence'),
'Mean lines per block' : self.mean('lines_per_block'),
# self.proportion
'Proportion of questions' : self.proportion('questions'),
'Proportion of exclamations' : self.proportion('exclamations'),
'Proportion of periods' : self.proportion('periods'),
'Proportion of numeric characters' : self.proportion('numeric'),
'Proportion of alphabetic characters' : self.proportion('alphabetic'),
'Proportion of punction characters' : self.proportion('punctuation'),
# self.frequency
'Word frequencies' : self.frequency('words'),
'Character frequencies' : self.frequency('chars'),
'Letter frequencies' : self.frequency('letters'),
'Number frequencies' : self.frequency('numbers'),
'Punctuation frequencies' : self.frequency('punctuation')
}
for key in summary:
print(key+':', summary[key])
return summary
def count_summary(self):
summary = {
'Words' : self.count('words'),
'Characters (with spaces)' : self.count('chars_exnewlines'),
'Characters (no spaces)' : self.count('chars_exwhitespace'),
'Letters' : self.count('letters'),
'Numbers' : self.count('numbers'),
'Vowels' : self.count('vowels'),
'Punctuation Marks' : self.count('punctuation'),
'Question Marks' : self.count('questions'),
'Exclamation Marks' : self.count('exclamations'),
'Periods' : self.count('periods'),
'Sentences' : self.count('sentences'),
'Lines (including empty)' : self.count('lines_all'),
'Lines (non-empty)' : self.count('lines_full'),
'Line Blocks' : self.count('line_blocks')
}
'''
for key in summary:
print(key+':', summary[key])
'''
#keys = tuple(summary.keys())
#print(keys)
vals = tuple(summary.values())
return vals
def mean_summary(self):
summary = {
'Characters/Word' : self.mean('chars_per_word'),
'Words/Line' : self.mean('words_per_line'),
'Words/Sentence' : self.mean('words_per_sentence'),
'Lines/Block' : self.mean('lines_per_block')
}
'''
for key in summary:
print(key+':', summary[key])
'''
vals = tuple(summary.values())
return vals
def proportion_summary(self):
summary = {
'Questions' : self.proportion('questions'),
'Exclamations' : self.proportion('exclamations'),
'Periods' : self.proportion('periods'),
'Numeric Characters' : self.proportion('numeric'),
'Alphabetic Characters' : self.proportion('alphabetic'),
'Punctuation Marks' : self.proportion('punctuation')
}
'''
for key in summary:
print(key+':', summary[key])
'''
vals = tuple(summary.values())
return vals
def freq_summary(self):
summary = {
'Words' : self.frequency('words'),
'Characters' : self.frequency('chars'),
'Letters' : self.frequency('letters'),
'Numbers' : self.frequency('numbers'),
'Punctuation' : self.frequency('punctuation')
}
'''
for key in summary:
print(key+':', summary[key])
'''
vals = tuple(summary.values())
return vals
# example Lyrics object
x = '(Rosetta.\nWho are you talking about?\nSweet Loretta Fart. She thought she was a cleaner\nSweet Rosetta Martin\nBut she was a frying pan, yeah\nRosetta\nThe picker! The picker! Picture the fingers burning!\nOo-wee!\nOK?\n1, 2, 1, 2, 3, 4)\n\nJo Jo was a man who thought he was a loner\nBut he knew it couldn\'t last\nJo Jo left his home in Tucson, Arizona\nFor some California grass\n\nGet back, get back\nGet back to where you once belonged\nGet back, get back\nGet back to where you once belonged\nGet back Jo Jo\nGo home\n\nGet back, get back\nGet back to where you once belonged\nGet back, get back\nBack to where you once belonged\nGet back, Jo\n\nSweet Loretta Martin thought she was a woman\nBut she was another man\nAll the girls around her say she\'s got it coming\nBut she gets it while she can\n\nOh, get back, get back\nGet back to where you once belonged\nGet back, get back\nGet back to where you once belonged\nGet back, Loretta\n\nGo home\nOh, get back, get back\nGet back to where you once belonged\nGet back, get back\nGet back to where you once belonged\n\nGet back\nWoo...\n\n(Thanks, Mo!\nI\'d like to say "thank you" on behalf of the group\nAnd ourselves and I hope we passed the audition!)'
l = Lyrics(x)
summary = l.summary()