<a href="https://colab.research.google.com/github/GermanMilla/python/blob/main/(Copy)_Compress_Audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Original file: https://colab.research.google.com/github/hfwittmann/sound/blob/master/Compress_audio_via_images.ipynb#scrollTo=vK34nC8tAZz9

# Settings

In [None]:
%matplotlib inline
import matplotlib
matplotlib.rcParams['figure.figsize'] = (15.0, 5.0)
import numpy as np
import soundfile as sf
from google.colab import files

In [None]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual, Button, HBox, VBox, Layout
import ipywidgets as widgets
import matplotlib.pyplot as plt

# Define compression helper functions

In [None]:
random_seed = 0
from sklearn.decomposition import TruncatedSVD
np.random.seed(random_seed)

def compress(imageIn, n_components=100,random_seed=0):
    
    image = imageIn
    
    if len (image.shape) != 3:
        raise('not sure what image type this')

    if image.shape[2] == 2:
        image_type = 'real_imaginary'
        
    if image.shape[2] == 3:
        image_type = 'RGB'

    #print(f'Found {image_type} image')
            
    n_of_layers = image.shape[2]
    
    compressed_list = []
    
    for layer in range(n_of_layers):
        # print(layer)
        image_layer = image[:,:,layer] # ie r, g or b
        
        clf = TruncatedSVD(n_components=n_components)
        clf.fit(image_layer)
        compressed_layer = clf.inverse_transform(clf.transform(image_layer))
        
        compressed_list.append(compressed_layer)
        
        
    compressed = np.stack(compressed_list, axis=2)    
    
    if image_type =='RGB':
      # clip to expected range
      compressed = np.clip(compressed, a_min=0, a_max=255)
      
      # cast to same dtype as original image
      compressed = np.array(compressed, dtype = image.dtype)
    
    # reshape to original image size
    compressed = compressed.reshape(imageIn.shape)

    SHAPE = compressed.shape[:2]
    #print('shape:', SHAPE)

    Original_memory = np.prod(SHAPE)
    Compressed_memory = (1 + np.sum(SHAPE) ) * n_components
    print(f'The compressed memory is roughly {100 * Compressed_memory / Original_memory:0.0f}% of the original.')
    
    return compressed

In [None]:
def compress_complex(complex_image, n_components=100,random_seed=0, doplot=False):
    '''
    complex in the sense of complex numbers,
    https://en.wikipedia.org/wiki/Complex_number
    
    ie having a real and an imaginary part
    
    '''
    real = np.real(complex_image)
    imaginary = np.imag(complex_image)

    image_ri = np.stack([real, imaginary], axis=2) 
    # image_ri : ri stands for real and imaginary
    # similarly to an 
    # ... rgb image (with three rgb layers) ...
    # ... it has teo layers (with two ri layers)

    compressed_image_ri = compress(image_ri, n_components, random_seed)

    compressed_real = compressed_image_ri[:,:,0]
    compressed_imaginary = compressed_image_ri[:,:,1]
    
    if doplot:
        plot_spectrum(real, 'Real Part')
        plot_spectrum(compressed_real, 'Real Part Compressed')
        
        plot_spectrum(imaginary, 'Imaginary Part')
        plot_spectrum(compressed_imaginary, 'Imaginary Part Compressed')
    
    compressed = compressed_real + 1j * compressed_imaginary
    return compressed
    

# Define plotting helper functions

In [None]:
from librosa import display

def plot_spectrum (data, name):
    display.specshow(data, y_axis='linear', x_axis='time')
    plt.title(f'Power spectrogram of {name}')
    plt.colorbar(format='%+2.0f dB')
    plt.tight_layout()
    # plt.show()

# Load Audio Helper Function

In [None]:
def load_audio_file(filepath):
    # %%
    y_multichannel, sr = lr.load(filepath, mono=False)
    print(y_multichannel.shape)

    if len(y_multichannel.shape)>1:
        channels = [0]
        y_channel_selection = y_multichannel[tuple([channels])]

        y = np.mean(y_channel_selection, axis=0)
    else:
        y = np.array(y_multichannel)
    
    return y, sr

# Download sound files


## Helper function to download (From Colab)

In [None]:
def download_from_colab(y_inverted_sftf, name):
   sf.write(f'{name}_compressed.wav', y_inverted_sftf, 22050, subtype='PCM_24')
   files.download(f'{name}_compressed.wav')


## Helper function to download

In [None]:
import requests
def download_soundfile(url, name):
    
    print(f'downloading {url} to file {name}')

    # download the file contents in binary format
    r = requests.get(url, allow_redirects=True)

    # open method to open a file on your system and write the contents
    with open(f"{name}", "wb") as code:
        code.write(r.content)
    return None

# Define which Sounds to use

In [None]:
#all files are stored in this drive folder: https://drive.google.com/drive/folders/14u9mNvxXODx8Qtp3YveDsJP0bADAOOCW
base_url = 'https://drive.google.com/uc?export=download&id='

sounds_dict = {'26094674_001': base_url+'1ijblRheMf3Pc03YRorRHOV4QdHwoWkGO', 
               '26094674_002' : base_url+'1ifF-Q5keJFWyI-mAb0OEjMn6LPDCPPOj',
               '26094674_003' : base_url+'1C5iowU8C5D7pRQc9p9I0dzPf6-VvkWIm',
               '26094674_004' : base_url+'1nSGhbcME6R3FQdgKR2oXieds_HFjx4KS',
               '26094674_005' : base_url+'1EY-GC-55QMDy2gVfB2HEsrxVeAi4KWNN',
               '26094674_006' : base_url+'1W1EpOl12L7jTg3GoWoyRO798y9Vq5kCh', 
               '26094674_007' : base_url+'1QgGeQGJ81E9F3pR25v3Wt5K8-8ukqSAD',
               '26094674_008' : base_url+'1yiUWT6dueo-bH1K5e6uF8ch1qCg6a7d_',
               '26094674_009' : base_url+'1hH_iBAOmxEqtb_0WgKO5F7fN6dmSP-_K',
               '26094674_010' : base_url+'1JODpRbp9tb-D9ibhtsdmCWf8buDHsLaZ',
               }

# Do the actual download

In [None]:
# do downloads
for name, url in sounds_dict.items():
    # print(f'downloading {name}')
    download_soundfile(url, f'{name}.wav')

downloading https://drive.google.com/uc?export=download&id=1ijblRheMf3Pc03YRorRHOV4QdHwoWkGO to file 26094674_001.wav
downloading https://drive.google.com/uc?export=download&id=1ifF-Q5keJFWyI-mAb0OEjMn6LPDCPPOj to file 26094674_002.wav
downloading https://drive.google.com/uc?export=download&id=1C5iowU8C5D7pRQc9p9I0dzPf6-VvkWIm to file 26094674_003.wav
downloading https://drive.google.com/uc?export=download&id=1nSGhbcME6R3FQdgKR2oXieds_HFjx4KS to file 26094674_004.wav
downloading https://drive.google.com/uc?export=download&id=1EY-GC-55QMDy2gVfB2HEsrxVeAi4KWNN to file 26094674_005.wav
downloading https://drive.google.com/uc?export=download&id=1W1EpOl12L7jTg3GoWoyRO798y9Vq5kCh to file 26094674_006.wav
downloading https://drive.google.com/uc?export=download&id=1QgGeQGJ81E9F3pR25v3Wt5K8-8ukqSAD to file 26094674_007.wav
downloading https://drive.google.com/uc?export=download&id=1yiUWT6dueo-bH1K5e6uF8ch1qCg6a7d_ to file 26094674_008.wav
downloading https://drive.google.com/uc?export=download&

# Analyse Sound files

In [None]:
import pathlib
import IPython
import librosa as lr
from glob import glob
import numpy as np
import matplotlib.pyplot as plt


@interact(name=sounds_dict.keys(),
          plt=plt.figure(figsize=(15, 7))
         )
def myprint(name):
    print(f'{name}.wav')
    y, sr = load_audio_file(f'{name}.wav')
    IPython.display.display(IPython.display.Audio(y, rate=sr))
    
    mysftf = lr.stft(y, n_fft= 1024, hop_length= 512)
    
    # plt.figure(figsize=(15, 7))
    #myplot = plot_spectrum(np.log(np.abs(mysftf)), 'Log of Absolute of Compressed')
    lr.display.specshow(lr.amplitude_to_db(np.abs(mysftf), ref=np.max), sr=sr,hop_length=512,  y_axis='linear', x_axis='time')
    plt.title('Spectrogram')
    plt.colorbar(format='%+2.0f dB')
    plt.tight_layout()

    return None


<Figure size 1080x504 with 0 Axes>

interactive(children=(Dropdown(description='name', options=('26094674_001', '26094674_002', '26094674_003', '2…

# Compress sound files

In [None]:
@interact(name=sounds_dict.keys(),
          n_components=[25, 50, 75,100,120,150,175])

def myprint(name, n_components=100):
    #print(name)
    y, sr = load_audio_file(f'{name}.wav')
    # IPython.display.display(IPython.display.Audio(y, rate=sr))
    mysftf = lr.stft(y, n_fft= 1024, hop_length= 512)
    # plot_spectrum(np.abs(mysftf), 'Absolute of Uncompressed')
    # 
    
    mysftf_compressed = compress_complex(mysftf, n_components=n_components, doplot=False)
    
    # plt.figure(figsize=(15, 7))
    #plot_spectrum(np.log(np.abs(mysftf_compressed)), 'Log of Absolute of Compressed')



    lr.display.specshow(lr.amplitude_to_db(np.abs(mysftf_compressed), ref=np.max), sr=sr, hop_length=512,  y_axis='linear', x_axis='time')
    plt.title('Spectrogram')
    plt.colorbar(format='%+2.0f dB')
    #plt.tight_layout()
    y_inverted_sftf = lr.istft(mysftf_compressed, hop_length= 512)
    fname = f"{name}_compressed.wav"

    #command to delete a previously compressed file
    !rm $fname

    print('\n')
    IPython.display.display(IPython.display.Audio(y_inverted_sftf, rate=22050))
    sf.write(f'{name}_compressed.wav', y_inverted_sftf, 22050, subtype='PCM_24');
    #files.download(f'{name}_compressed.wav')

interactive(children=(Dropdown(description='name', options=('26094674_001', '26094674_002', '26094674_003', '2…

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>