# Audio Preprocessing for Anonymization
This notebook demonstrates how to help "anonymize" audio by changing its speed and pitch


In [1]:
from gtts import gTTS
from pydub import AudioSegment



Use Text To Speech (TTS) to generate an audio with spoken words and test it

In [2]:
# Text to convert to speech
text = "Hello AI, I hope you are not adversarial. Please take a sit"

# Create a gTTS object
tts = gTTS(text=text, lang='en')
audio_file_path =  "../audio/hello_ai.mp3"
# Save the audio file
tts.save(audio_file_path)

# Optional: Load and play the audio using pydub
audio = AudioSegment.from_file(audio_file_path)
audio.export(audio_file_path, format="mp3")

print("Audio file 'hello_ai.mp3' created successfully.")


Audio file 'hello_ai.mp3' created successfully.


Use pydub to load the file and increase the pitch and speed of the audio clip

In [3]:
from pydub import AudioSegment
from pydub.playback import play
import os


# Check if the file exists
if not os.path.exists(audio_file_path):
    raise FileNotFoundError(f"The file {audio_file_path} does not exist.")

# Load audio file
try:
    audio = AudioSegment.from_file(audio_file_path)
except Exception as e:
    raise RuntimeError(f"Could not load audio file: {e}")

# Speed up the audio to 1.5 times the original speed
# This will increase the pitch as well
try:
    speed_up = audio.speedup(playback_speed=1.5)
except Exception as e:
    raise RuntimeError(f"Error in speeding up the audio: {e}")

# Save the modified audio
output_file_path = "modified_hello_ai.mp3"
try:
    speed_up.export(output_file_path, format="mp3")
    print(f"Modified audio saved to {output_file_path}")
except Exception as e:
    raise RuntimeError(f"Could not save the modified audio file: {e}")

# Play the modified audio (optional)
try:
    play(speed_up)
except Exception as e:
    print(f"Could not play the audio: {e}")


Modified audio saved to modified_hello_ai.mp3


Input #0, wav, from '/tmp/tmpjgz03ctq.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.68, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   3.62 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


