/
helpers.py
126 lines (106 loc) · 4.26 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import cv2
import numpy as np
from scipy.ndimage.filters import gaussian_filter1d
import mechanize
import csv
from bs4 import BeautifulSoup
from pydub import AudioSegment
import os
def createVideo(chunks):
chunks[0].reset()
ret = chunks[0].read()
height , width , layers = ret.shape
video = cv2.VideoWriter()
video.open('video.mov',cv2.cv.CV_FOURCC(*'mp4v'), 30,(width,height),True)
images = []
indexes = []
for chunk in chunks:
chunk.reset()
indexes.append(len(images))
for i in range(chunk.getStart(),chunk.getEnd()):
img = chunk.read()
if (i == chunk.getEnd()-1 or i == chunk.getStart()):
cv2.imwrite('obama-'+str(i)+'.jpg',img)
images.append(img)
# Gaussian Smoothing
gaussianRange = 1
for i in range(1,len(indexes)):
img_array = np.zeros((height,width,3, gaussianRange * 2))
c = 0
for j in range(indexes[i]-gaussianRange,indexes[i]+gaussianRange):
img_array[:,:,:,c] = images[j]
c = c+1
smoothed = gaussian_filter1d(img_array,sigma=1.5,axis=3)
#print "smoothed" , smoothed.shape
c = 0
for j in range(indexes[i]-gaussianRange,indexes[i]+gaussianRange):
images[j] = smoothed[:,:,:,c].astype("uint8")
c = c+1
for img in images:
#cv2.imshow('hi',img)
video.write(img)
#raw_input()
#print img
cv2.destroyAllWindows()
video.release()
video = None
def createAudio(audioChunks):
song = AudioSegment.from_wav("media/obama-speech-2.wav")
newAudio = song[0:0]
for chunk in audioChunks:
newAudio = newAudio + song[chunk[0]:chunk[1]]
newAudio.export("audio.wav",format="wav")
def writeVideo(chunks, audioChunks):
createVideo(chunks)
createAudio(audioChunks)
os.system("ffmpeg -i video.mov -i audio.wav -vcodec copy -acodec copy final.mov")
def phonemize(sentence):
phoneme_input = sentence.replace(" ", "+")
br = mechanize.Browser()
br.open("http://www.speech.cs.cmu.edu/cgi-bin/cmudict?in=" + phoneme_input)
phonemes = str(BeautifulSoup(br.response().read()).findAll('tt')[1].contents[0])
return phonemes
def tsvToTimeList(tsvFilename, paddingInMillisecond = 0):
timeList = []
with open(tsvFilename, 'rb') as csvfile:
csvReader = csv.reader(csvfile, delimiter="\t")
for row in csvReader:
timeList.append((int(row[0]) - paddingInMillisecond,int(row[1]) + paddingInMillisecond,row[2]))
return timeList
def wordsToPhonemes(tsvFilename = "words.tsv", outputTsvFile = "phonemes.tsv", batchSize = 5):
phonemeList = []
wordCount = 0
with open(tsvFilename, 'rb') as csvfile:
csvReader = csv.reader(csvfile, delimiter="\t")
wordsKepper = []
for row in csvReader:
wordsKepper.append((int(row[0]),int(row[1]),row[2]))
if len(wordsKepper) % batchSize == 0:
phonemeList.extend(_batchSendAndSplit(wordsKepper))
wordsKepper = []
wordCount += 1
if wordCount % 100 == 0:
print "done", wordCount, "words"
if len(wordsKepper) != 0:
phonemeList.extend(_batchSendAndSplit(wordsKepper))
wordsKepper = []
with open(outputTsvFile,'w') as out:
csv_out=csv.writer(out, delimiter="\t")
for row in phonemeList:
csv_out.writerow(row)
def _batchSendAndSplit(wordsKepper):
phonemeList = []
stringList = " ".join([t[2] for t in wordsKepper])
wordPhonemes = phonemize(stringList).split(".")
for i, t in enumerate(wordsKepper):
phonemes = wordPhonemes[i].strip().split(" ")
eachPhonemeTime = (t[1] - t[0])/len(phonemes)
# for j, phoneme in enumerate(phonemes):
# phonemeList.append((t[1] + j*eachPhonemeTime - 60, t[1] + (j+1)*eachPhonemeTime + 60, phoneme))
if eachPhonemeTime <= 120:
for j, phoneme in enumerate(phonemes):
phonemeList.append((t[1] + j*eachPhonemeTime - 60, t[1] + (j+1)*eachPhonemeTime + 60, phoneme))
else:
for j, phoneme in enumerate(phonemes):
phonemeList.append((t[1] + j*eachPhonemeTime, t[1] + (j+1)*eachPhonemeTime, phoneme))
return phonemeList