In [60]:
import parselmouth
import time
import numpy as np
import speech_recognition as sr
import subprocess
from pythonosc.udp_client import SimpleUDPClient
import sounddevice as sd



In [61]:
# ADD:
# MESSAGE FOR CHANGING PITCH (done)
# AUDIO FILE PROCESSING SUPPORT + saving to audio file (done)
# INPUT/OUTPUT TIMESTAMP
# look at sampling size, make sure it is calculating correctly (yes, it's getting f1)
# have the hapticizer the same volume as speaker : NEED TO MAKE TIMEFRAME OF SOUND A LITTLE LONGER!!! (done)
# multiple options for how to process speech
# ideas: 
# - quiet mode / crowded room mode with different intensity thresholds
# - 

# Create UDP client to send pitch to chuck code
client = SimpleUDPClient("127.0.0.1", 6449)
hmin = 100
hmax = 300
vmin = 80
vmax = 400
fs = 44100


print("Welcome to Hapticizer 3000. Enter a number:")
choice = input("(1) Hapticize an audio file \n(2) Hapticize real-time audio\n")

if choice == "1":
    filename = input("What file would you like to hapticize? ")
    sound = parselmouth.Sound(filename)
    pitch = sound.to_pitch()
    intensity = sound.to_intensity()
    intensity_values = intensity.values[0]
    timestep = pitch.dt
    print("FRAME LENGTH: ", pitch.dt)
    print("length", len(pitch.selected_array["frequency"]))
    input("Start the hapticizer and then press enter to start.")
    #Frame length is 0.01 sec
    #To make chunk size the same as real-time, we need to get avg pitch of every 500 samples
    pitch_total = 0.0
    for frame in range(len(pitch.selected_array["frequency"])):
        curpitch = pitch.selected_array["frequency"][frame]
        if frame % 20 != 0:
            pitch_total += curpitch
        else:
            curpitch = pitch_total / 20
            curintensity = 0.0
            if frame < len(intensity_values):
                curintensity = intensity_values[frame]
            if curintensity > 20 and curpitch > 80 and curpitch < 1000:
                    
                normalized_pitch = hmin + ((curpitch - vmin) / (vmax - vmin)) * hmax
                            # Normalize intensity to Chuck's gain range, 0-1
                            # Guesstimating intensity range to be like 30-100 dB
                normalized_intensity = (curintensity - 30) / (100 - 30)
                print("Sending message")
                client.send_message("/pitch", float(normalized_pitch))
                if curintensity < 50:
                    client.send_message("/loudness", 0.1)
                elif curintensity > 70:
                    client.send_message("/loudness", 1.0)
                else:
                    client.send_message("/loudness", 0.5)
            
            pitch_total = 0.0
            
        time.sleep(timestep)

    print("Done!")

else:        

    def detect_and_send_pitch(audio, sample_rate):
        # Convert audio to a Parselmouth Sound object
        sound = parselmouth.Sound(audio, sampling_frequency=sample_rate)
        rms = sound.get_rms()
        intensity = sound.get_intensity()
        print("Intensity", sound.get_intensity())

        # Sound intensity threshold: if sound is less than 30 dB, ignore it
        # Probably needs to be higher than 30 in practice especially in a noisy environment
        if intensity < 30:
            return
        print("detect and send pitch")
        # Extract pitch using Parselmouth
        pitch = sound.to_pitch(pitch_floor=120)
        print("FRAME LENGTH", pitch.dt)
        print("whole thing length", pitch.get_total_duration())
        pitch_values = pitch.selected_array['frequency']

        total = 0.0

        for value in pitch_values:
            # Detect pitches in human voice range: 80-300 Hz
            total = total + value
        
        value = total / len(pitch_values)
        print("VALUE", value)

        vmin = 80
        vmax = 400
        if value > vmin and value < vmax:
            print(value)

            # Normalize to haptic range: 100-300 Hz (SUBJECT TO CHANGE)
            hmin = 100
            hmax = 300
            normalized_pitch = hmin + ((value - vmin) / (vmax - vmin)) * hmax

            # Normalize intensity to Chuck's gain range, 0-1
            # Guesstimating intensity range to be like 30-100 dB
            normalized_intensity = (intensity - 30) / (100 - 30)
            client.send_message("/pitch", normalized_pitch)
            if intensity < 50:
                client.send_message("/loudness", 0.1)
            elif intensity > 70:
                client.send_message("/loudness", 1.0)
            else:
                client.send_message("/loudness", 0.5)
            #time.sleep(0.2)

        
        

    def audio_callback(indata, frames, time, status):
        print("callback")
        if status:
            print(status)
        mono_audio=np.mean(indata, axis=1)
        detect_and_send_pitch(mono_audio, fs)

    #Duration will be the length of the chunk used to get the pitch and intensity
    #Currently 0.5 seconds
    duration = int(fs * 0.5)

    with sd.InputStream(channels=1, samplerate=fs, blocksize=duration, callback=audio_callback):
        print("Listening...")
        while True:
            pass



    # General function for analyzing sound
    def analyze_sound(sound):
        analysis = {"pitch":[], "intensity":[], "avgPitch":0, "avgIntensity":0, "times":[]}
        pitch = sound.to_pitch()
        analysis["pitch"] = pitch.values[0]
        analysis["avgPitch"] = pitch.selected_array['frequency']

        intensity = sound.to_intensity()
        analysis["times"] = intensity.xs()
        analysis["intensity"] = intensity.values[0]
        analysis["avgIntensity"] = intensity.get_average()
        print(analysis["avgIntensity"])
        return analysis

    # Previous attempt at sending sound (slow and finicky)

    def send_intensity_to_chuck(chuck_instance, times, intensity_values, output_wav):
        
        new_intensity = []

        #python library that detects voice; when voice off, play the vibration for the previous utterance (maximum time frame?)
        
        #approach that records some and then processes it with delay
        r = sr.Recognizer()
        r.pause_threshold = 0.8 # this is the default; can be changed
        with sr.Microphone() as source:
            audio = r.listen(source)
            wav_data = audio.get_wav_data()
            cur_sound = parselmouth.Sound(wav_data, audio.sample_rate)
            analysis = analyze_sound(cur_sound)


        # can it be even more real time.... ponder 
        for time_step, intensity in zip(analysis["times"], analysis["intensity"]):
            #normalize intensity and pitch
            s.setGain(intensity)

        subprocess.run(["chuck", "hapticize.ck"])
        chuck_instance.run("""
        SinOsc s => Gain g => dac; // Sine oscillator
        s => WvOut w => blackhole;

        "{output}" => w.wavFilename;

        200 => s.freq;   // Base frequency
        0.1::second => dur d; // Time step

        fun void updateGain(float newGain) {
            newGain => s.gain; // Update the gain based on intensity
        }

        while (true) {
            1::second => now; // Keep the ChucK VM running
        }
        """)

        # Send the intensity data to ChucK
        for time_step, intensity in zip(times, intensity_values):
            chuck_instance.call("updateGain", [float(intensity)])
            time.sleep(time_step)

    # output = open("output.wav", "wb")
    # send_intensity_to_chuck(chuck_instance, times, values, output)

    #if [acoustic property] > number, add a haptic vibration for it

Welcome to Hapticizer 3000. Enter a number:
FRAME LENGTH:  0.01
length 929
Sending message
Sending message
Sending message
Sending message
Done!
