-
Notifications
You must be signed in to change notification settings - Fork 1
/
target_ngrams.py
110 lines (100 loc) · 3.61 KB
/
target_ngrams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import index
import entropy
import csv
import by_date
import spectral_feats
import imp
imp.reload(entropy)
def get_ngram_spectral_feats(filepath, ngram, bird_ID):
spect_dict = spectral_feats.make_dict(filepath)[bird_ID]
means_dict = {}
feats = spect_dict[list(spect_dict.keys())[0]].keys()
for feat in feats:
means_dict[feat] = []
for char in ngram:
if char in spect_dict.keys():
for i in range(len(feats)):
char_feat = list(feats)[i]
means_dict[char_feat].append(
float(spect_dict[char][char_feat]))
for key, value in means_dict.items():
print(key)
print(value)
if len(value) > 0:
means_dict[key] = sum(value) / len(value)
else:
means_dict[key] = 'no_data'
return means_dict
def ngram_info(fp, n, target_syllable='all', min_count=5, probs=False, backwards=False, ExpectedProbability=False):
out_list = []
ngrams = index.get_probs(fp, [n, n + 1], backwards)[n]
target_ngrams = []
for ngram in ngrams.keys():
if ngram[-1] == target_syllable or target_syllable == 'all':
ngram_string = ''
for char in ngram:
ngram_string = ngram_string + char
if ngram_string not in target_ngrams:
target_ngrams.append(ngram_string)
for ngram in target_ngrams:
minilist = [ngram[:-1]]
minilist.append(entropy.get_ngram_entropy(fp, ngram[:-1],backwards = backwards, ExpectedProbability = ExpectedProbability)[0])
minilist.append(entropy.get_ngram_entropy(fp, ngram[:-1],backwards = backwards, ExpectedProbability = ExpectedProbability)[1])
count = ngrams[tuple(ngram)][1]
prob = ngrams[tuple(ngram)][0]
if probs:
minilist.append(prob)
if count >= min_count:
out_list.append(minilist)
mode = 'w'
for minilist in out_list:
with open("./output/target_ngrams.csv", mode) as output_file:
writer = csv.writer(output_file)
writer.writerow(minilist)
mode = 'a'
return out_list
def info_and_feats(songs_fp, n, spectral_fp, bird_ID, backwards=False, ExpectedProbability=False):
info = ngram_info(songs_fp, n, backwards = backwards, ExpectedProbability = ExpectedProbability)
out_list = []
for minilist in info:
ngram = minilist[0]
feats = get_ngram_spectral_feats(spectral_fp, ngram, bird_ID)
for feat in feats.values():
minilist.append(feat)
out_list.append(minilist)
mode = 'w'
for minilist in out_list:
with open("./output/ngram_info_and_feats.csv", mode) as output_file:
writer = csv.writer(output_file)
writer.writerow(minilist)
mode = 'a'
return out_list
'''
if ent_fp=='none':
ent_fp=fp
'''
def info_and_dates(
baseline_fp,
dates_fp,
target_syllable,
n,
start_date,
end_date):
info_list = ngram_info(baseline_fp, n, target_syllable)
out_list = []
for row in info_list:
mini_out_list = row
ngram = row[0] + target_syllable
print(ngram)
dates_and_probs_list = by_date.get_probs_by_date(
dates_fp, ngram, start_date, end_date)
for date in dates_and_probs_list:
mini_out_list.append(date[1])
out_list.append(mini_out_list)
mode = 'w'
for minilist in out_list:
with open("./output/target_ngrams_with_dates.csv", mode) as output_file:
writer = csv.writer(output_file)
writer.writerow(minilist)
mode = 'a'
return out_lists