# Speech Recognition by Data Camp
## Part 3 : PyDub  

##### git: https://github.com/jiaaro/pydub

### Create an AudioSegment Instance   
#### Composition of AudioSegment : 
 - channels – the number of channels in the wave file (1 for mono, 2 for stereo, etc…
 - width – the number of bytes per sample
 - framerate – the number of frames per second (hertz)
 - nframes – the number of frames in the data stream
 - bytes – a string object containing the bytes of the data stream


In [1]:
# Import AudioSegment from Pydub
from pydub import AudioSegment

# Create an AudioSegment instance
wav_file = AudioSegment.from_file(file="./data_part_3/wav_file.wav", 
                                  format="wav")

# Check the type
print(type(wav_file))

<class 'pydub.audio_segment.AudioSegment'>


### Play audio file

In [2]:
# Import play
from pydub.playback import play

# Play the audio file
play(wav_file)

### Retrieve Frame Rate

In [3]:
# Find the frame rate
print(wav_file.frame_rate)

48000


### Retieve number of channels

In [4]:
# Find the number of channels
print(wav_file.channels)

2


### Retieve the max amplitude 

In [5]:
# Find the max amplitude
print(wav_file.max)

8484


### Retieve the length 

In [6]:
# Find the length
print(len(wav_file))

3284


### Set frame rate

In [7]:
# Create a new wav file with adjusted frame rate
wav_file_16k = wav_file.set_frame_rate(16000)

# Check the frame rate of the new wav file
print(wav_file_16k.frame_rate)

16000


### Set Number of channels


In [8]:
# Set number of channels to 1
wav_file_1_ch = wav_file.set_channels(1)

# Check the number of channels
print(wav_file_1_ch.channels)

1


### Retieve & Set sample of width

In [9]:
# Print sample_width
print(f"Old sample width: {wav_file.sample_width}")

# Set sample_width to 1
wav_file_sw_1 = wav_file.set_sample_width(1)

# Check new sample_width
print(f"New sample width: {wav_file_sw_1.sample_width}")

Old sample width: 2
New sample width: 1


### Turn up & Turn down the volume

In [10]:
# Import audio file
volume_adjusted = AudioSegment.from_file("./data_part_3/volume_adjusted.wav")

# Lower the volume by 60 dB
quiet_volume_adjusted = volume_adjusted - 60


# Increase the volume by 15 dB
louder_volume_adjusted = volume_adjusted + 15

### Normalize amplitude

In [11]:
from pydub.effects import normalize

# Import target audio file
loud_then_quiet = AudioSegment.from_file("./data_part_3/loud_then_quiet.wav")

# Normalize target audio file
normalized_loud_then_quiet = normalize(loud_then_quiet)

### Chopping and changing audio files

In [12]:
from pydub import AudioSegment

# Import part 1 and part 2 audio files
part_1 = AudioSegment.from_file("./data_part_3/ex3_slicing_part_1.wav")
part_2 = AudioSegment.from_file("./data_part_3/ex3_slicing_part_2.wav")
corrected_part_3 = AudioSegment.from_file("./data_part_3/ex3_slicing_part_3.wav")

# Remove the first four seconds of part 1
part_1_removed = part_1[4000:]

# Add the remainder of part 1 and part 2 together
part_3 = part_1_removed + part_2

### Splitting stereo audio to mono with PyDub

In [13]:
# Import AudioSegment
from pydub import AudioSegment

# Import stereo audio file and check channels
stereo_phone_call = AudioSegment.from_file("./data_part_3/ex3_stereo_call.wav")
print(f"Stereo number channels: {stereo_phone_call.channels}")

# Split stereo phone call and check channels
channels = stereo_phone_call.split_to_mono()

print(f"Split number channels: {channels[0].channels}, {channels[1].channels}")

# Save new channels separately
phone_call_channel_1 = channels[0]
phone_call_channel_2 = channels[1]

Stereo number channels: 2
Split number channels: 1, 1


### Exporting and reformatting audio files

In [14]:
from pydub import AudioSegment

# Import the .mp3 file
mp3_file = AudioSegment.from_file("./data_part_3/mp3_file.mp3")

# Export the .mp3 file as wav
mp3_file.export(out_f="./data_part_3/mp3_file.wav",
                format="wav")

<_io.BufferedRandom name='./data_part_3/mp3_file.wav'>

### Manipulating multiple audio files with PyDub

In [15]:
import os

In [16]:
def convertToWav(folder):
    # Loop through the files in the folder
    for audio_file in folder:

        # Create the new .wav filename
        wav_filename = os.path.splitext(os.path.basename(audio_file))[0] + ".wav"

        # Read audio_file and export it in wav format
        AudioSegment.from_file(audio_file).export(out_f=wav_filename, 
                                          format="wav")

        print(f"Creating {wav_filename}...")

### An audio processing workflow

In [17]:
file_with_static = AudioSegment.from_file("./data_part_3/preprocessing_data/ex3-static-help-with-account.mp3")

# Cut the 3-seconds of static off
file_without_static = file_with_static[3000:]

# Increase the volume by 10dB
louder_file_without_static = file_without_static + 10

In [18]:
def preprocessingAudio(folder):
    for audio_file in folder:
        file_with_static = AudioSegment.from_file(audio_file)

        # Cut the 3-seconds of static off
        file_without_static = file_with_static[3000:]

        # Increase the volume by 10dB
        louder_file_without_static = file_without_static + 10

        # Create the .wav filename for export
        wav_filename = "./data_part_3/preprocessing_data/"+os.path.splitext(os.path.basename(audio_file))[0] + ".wav"

        # Export the louder file without static as .wav
        louder_file_without_static.export(wav_filename, format="wav")
        print(f"Creating {wav_filename}...")

In [19]:
preprocessingAudio(["./data_part_3/preprocessing_data/ex3-static-help-with-account.mp3"])

Creating ./data_part_3/preprocessing_data/ex3-static-help-with-account.wav...
