-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
331 lines (269 loc) · 13.1 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
import cv2
import tkinter as tk
from PIL import ImageTk
import time
import PIL
import tkinter
import hand_tracking
import midi
import mediapipe_utils
from camera import Camera
import oscillator
from smoothing import DataSmoother
from gui import ThereminGUI
from numpy import ndarray
from typing import Tuple, Optional, Dict
quit_flag = False
# Create DataSmoother instances for frequency and volume
frequency_smoother = DataSmoother()
volume_smoother = DataSmoother()
def on_closing() -> None:
"""
Set the global quit_flag to True and destroy the theremin_gui root window.
This function is typically used as a callback for handling the closing event
of the theremin_gui application window. It sets the global quit_flag to True
to signal the main update loop to exit, and then destroys the root window
of the theremin_gui, effectively closing the application.
"""
global quit_flag
quit_flag = True
theremin_gui.root.destroy()
def read_camera() -> ndarray:
"""
Read a frame from the camera and flip it horizontally.
This function reads a frame from the camera, flips it horizontally, and
returns the modified frame. Flipping the frame horizontally is useful for
creating a mirror effect, which is often more intuitive for users when
interacting with applications that involve hand tracking and gesture recognition.
Returns:
frame (numpy.ndarray): The captured and horizontally flipped frame.
"""
success, frame = camera.read()
frame = camera.flip_horizontal(frame)
return frame
def process_frame(frame: ndarray) -> Tuple[ndarray, dict]:
"""
Process the input frame using hand_tracking.process_image.
This function processes the input frame using the hand_tracking.process_image function.
It uses the hand_detector, drawing_utils, and connections_draw_spec objects to detect
and draw hand landmarks on the frame.
Args:
frame (numpy.ndarray): The input frame to be processed.
Returns:
Tuple[numpy.ndarray, dict]: A tuple containing the processed frame with drawn hand landmarks
and a dictionary containing hand coordinates.
"""
return hand_tracking.process_image(frame, hand_detector, drawing_utils, connections_draw_spec)
def update_smoothing_factors(smoothing_factor: Optional[float]) -> None:
"""
Update the smoothing factors of the frequency_smoother and volume_smoother objects.
This function updates the smoothing factors of the frequency_smoother and
volume_smoother objects if a valid smoothing_factor value is provided.
Args:
smoothing_factor (Optional[float]): The new smoothing factor value to be set for both
frequency_smoother and volume_smoother objects.
If None, no update will be performed.
"""
if smoothing_factor is not None:
frequency_smoother.smoothing_factor = smoothing_factor
volume_smoother.smoothing_factor = smoothing_factor
def update_change_limits(change_limit: Optional[float]) -> None:
"""
Update the smoothing factors of the frequency_smoother and volume_smoother objects.
This function updates the smoothing factors of the frequency_smoother and
volume_smoother objects if a valid smoothing_factor value is provided.
Args:
smoothing_factor (Optional[float]): The new smoothing factor value to be set for both
frequency_smoother and volume_smoother objects.
If None, no update will be performed.
"""
if change_limit is not None:
frequency_smoother.change_limit = change_limit
volume_smoother.change_limit = change_limit
def update_hands_display(frame: ndarray, hands_coord: Dict[str, Dict[str, float]]) -> None:
"""
Update the hands display on the frame with the given hand coordinates.
This function iterates through the hand coordinates and displays the X and Y
coordinates of each hand (Right or Left) on the input frame.
Args:
frame (numpy.ndarray): The input frame on which to display the hand coordinates.
hands_coord (Dict[str, Dict[str, float]]): A dictionary containing hand coordinates.
The keys are 'Right' and/or 'Left', and the
values are dictionaries with 'x' and 'y' keys
and float values.
"""
for hand, coords in hands_coord.items():
display_text(frame, f"{hand} X: {hands_coord[hand]['x']:.2f}", (7, 110 if hand == 'Right' else 170), font_scale=1.5)
display_text(frame, f"{hand} Y: {hands_coord[hand]['y']:.2f}", (7, 140 if hand == 'Right' else 200), font_scale=1.5)
def update_frequency_and_volume_labels(
hands_coord: Dict[str, Dict[str, float]],
previous_volume: float
) -> Tuple[float, float]:
"""
Update the frequency and volume labels based on the given hand coordinates.
This function updates the frequency and volume labels based on the hand coordinates.
It calculates the frequency and volume values, smooths them using the frequency_smoother
and volume_smoother objects, and updates the corresponding labels.
Args:
hands_coord (Dict[str, Dict[str, float]]): A dictionary containing hand coordinates.
The keys are 'Right' and/or 'Left', and the
values are dictionaries with 'x' and 'y' keys
and float values.
previous_volume (float): The previous volume value.
Returns:
Tuple[float, float]: A tuple containing the smoothed frequency and volume values.
"""
smooth_frequency = None
smooth_volume = None
if "Right" in hands_coord:
frequency = midi.y_to_frequency(hands_coord["Right"]["y"])
smooth_frequency = frequency_smoother.smooth(frequency)
frequency_label.config(text=f"Freq : {smooth_frequency:.2f}")
if "Left" in hands_coord:
volume = midi.y_to_volume(hands_coord["Left"]["y"])
smooth_volume = volume_smoother.smooth(volume)
volume_label.config(text=f"Vol : {smooth_volume:.0f}")
else:
smooth_volume = volume_smoother.smooth(previous_volume)
return smooth_frequency, smooth_volume
def play_sound(
smooth_frequency: float,
smooth_volume: float,
previous_frequency: float,
previous_volume: float
) -> Tuple[float, float, float]:
"""
Play a sine wave sound based on the given smoothed frequency and volume values.
This function plays a sine wave sound if the difference between the current smoothed
frequency (smooth_frequency) and the previous frequency (previous_frequency) is greater
than 1, or if the difference between the current smoothed volume (smooth_volume) and
the previous volume (previous_volume) is greater than 1. If a sound was playing, it
stops the previous sound and plays a new sound with the current smoothed frequency
and volume. Finally, it updates the previous_frequency and previous_volume variables
with the new smoothed values.
Args:
smooth_frequency (float): The current smoothed frequency value.
smooth_volume (float): The current smoothed volume value.
previous_frequency (float): The previous frequency value.
previous_volume (float): The previous volume value.
Returns:
Tuple[float, float, float]: A tuple containing the sound object, the updated
previous_frequency, and the updated previous_volume.
"""
sound = None
if (smooth_frequency is None) or (previous_frequency is None) or (smooth_volume is None) or (previous_volume is None):
return None, None, None
if abs(smooth_frequency - previous_frequency) > 1 or abs(smooth_volume - previous_volume) > 1:
if sound:
oscillator.stop_sound(sound)
sound = oscillator.play_sine_wave(smooth_frequency, 1, smooth_volume)
previous_frequency = smooth_frequency
previous_volume = smooth_volume
return sound, previous_frequency, previous_volume
def update_canvas(frame: ndarray) -> None:
"""
Update the canvas with the given frame.
This function converts the input frame from BGR to RGB, creates a PhotoImage
object from the frame, and then updates the canvas with the new image.
Args:
frame (numpy.ndarray): The input frame to display on the canvas.
"""
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
photo = PIL.ImageTk.PhotoImage(image=PIL.Image.fromarray(frame))
canvas.create_image(0, 0, image=photo, anchor=tkinter.NW)
def update_framerate(previous_time: float) -> float:
"""
Update the framerate label and calculate the new previous_time.
This function calculates the current framerate, updates the framerate label
with the calculated framerate, and updates the previous_time variable.
Args:
previous_time (float): The previous timestamp used to calculate the framerate.
Returns:
float: The updated previous_time value.
"""
current_time = time.time()
framerate = 1 / (current_time - previous_time)
previous_time = current_time
framerate_label.config(text=f"FPS: {framerate:.0f}")
return previous_time
def update_loop(
theremin_gui: "ThereminGUI",
smoothing_factor: Optional[float] = None,
change_limit: Optional[float] = None
) -> None:
"""
Main loop for updating the theremin GUI, processing frames, and playing sounds.
This function reads frames from the camera, processes them to detect hands,
updates the theremin GUI, sets smoothing factors and change limits, updates
the hands display, calculates smoothed frequency and volume, plays sounds
based on the calculated values, updates the canvas, and updates the framerate.
Args:
theremin_gui (ThereminGUI): The theremin GUI instance.
smoothing_factor (Optional[float]): The smoothing factor for frequency and volume, if any.
change_limit (Optional[float]): The change limit for frequency and volume, if any.
"""
global hand_detector, drawing_utils, connections_draw_spec, canvas, hands_coord
previous_time = 0
previous_frequency = 0
previous_volume = 0
while True:
frame = read_camera()
frame, hands_coord = process_frame(frame)
if quit_flag:
break
theremin_gui.root.update()
update_smoothing_factors(smoothing_factor)
update_change_limits(change_limit)
update_hands_display(frame, hands_coord)
smooth_frequency, smooth_volume = update_frequency_and_volume_labels(hands_coord, previous_volume)
if smooth_frequency is not None and smooth_volume is not None:
sound, previous_frequency, previous_volume = play_sound(smooth_frequency, smooth_volume, previous_frequency, previous_volume)
if sound is not None and previous_frequency is not None and previous_volume is not None:
# update_canvas(frame)
theremin_gui.update_canvas(frame)
previous_time = update_framerate(previous_time)
# theremin_gui.update_tuner_label(smooth_frequency)
theremin_gui.update_tuner_canvas(smooth_frequency)
if quit_flag:
break
def display_text(
image: ndarray,
text: str,
position: tuple[int, int],
font_scale: float = 1.5,
font: int = cv2.FONT_HERSHEY_PLAIN,
color: tuple[int, int, int] = (217, 0, 234),
thickness: int = 3
) -> None:
"""
Display text on an image at the specified position.
This function puts the given text on the input image at the given position
with specified font, font scale, color, and thickness.
Args:
image (np.ndarray): The input image.
text (str): The text to display on the image.
position (tuple[int, int]): The x and y coordinates of the text position on the image.
font_scale (float, optional): The scale factor of the font. Default is 1.5.
font (int, optional): The font type from OpenCV's font options. Default is cv2.FONT_HERSHEY_PLAIN.
color (tuple[int, int, int], optional): The color of the text in BGR format. Default is (217, 0, 234).
thickness (int, optional): The thickness of the text. Default is 3.
"""
cv2.putText(image, text, position, font, font_scale, color, thickness)
camera = Camera()
hand_detector, drawing_utils, connections_draw_spec = mediapipe_utils.init_mediapipe()
previous_time = 0
current_time = 0
hands_coord = {}
# Create an instance of ThereminGUI
theremin_gui = ThereminGUI(update_loop, on_closing)
canvas = theremin_gui.canvas
frequency_label = theremin_gui.frequency_label
volume_label = theremin_gui.volume_label
framerate_label = theremin_gui.framerate_label
# # Start the update loop
# update_loop(theremin_gui.root)
update_loop(theremin_gui)
theremin_gui.start()
# Start the ThereminGUI
theremin_gui.start()
camera.release()