In [12]:
!pip install gradio


import librosa
import numpy as np
import soundfile as sf
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from IPython.display import Audio
import gradio as gr
import warnings
import os
import time

# Suppress warnings
warnings.filterwarnings("ignore", category=UserWarning, module='librosa')

# Function for chaotic mapping
def chaotic_map(c, map_type="logistic", L=0, U=1):
    if map_type == "logistic":
        return L + (U - L) * (c * (1 - c))

# HCPSA optimization with MSE penalty
def hcpsa_optimization(objective_function, L, U, map_type="logistic", max_iter=100):
    N = max_iter
    c = np.random.rand()  # Initial chaotic variable
    f_min = float('inf')
    X_min = None

    for k in range(1, N + 1):
        X = chaotic_map(c, map_type=map_type, L=L, U=U)
        # Clip the value of X to stay within bounds
        X = np.clip(X, L, U)
        f_value = objective_function(X)
        if f_value < f_min:
            f_min = f_value
            X_min = X
        c = chaotic_map(c, map_type=map_type, L=0, U=1)

    return X_min

# Function to process audio
def process_audio(file_path):
    start_time = time.time()

    y, sr = librosa.load(file_path, sr=None)

    # Generate Mel-spectrogram
    n_mels = 512
    fmax = sr // 2
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
    log_S = librosa.power_to_db(S, ref=np.max)

    # Apply PCA
    S_flattened = log_S.T
    pca = PCA()
    pca.fit(S_flattened)

    # Calculate explained variance
    explained_variance_cumsum = np.cumsum(pca.explained_variance_ratio_)
    target_variance = 0.9
    n_components_dynamic = np.argmax(explained_variance_cumsum >= target_variance) + 1

    # HCPSA Objective Function
    def reconstruction_error_with_mse_penalty(components):
        components = int(np.clip(components, 100, n_components_dynamic))  # Minimum 100 components
        pca_dynamic = PCA(n_components=components)
        S_pca_dynamic = pca_dynamic.fit_transform(S_flattened)
        S_reconstructed_dynamic_flat = pca_dynamic.inverse_transform(S_pca_dynamic)
        reconstruction_error = np.linalg.norm(S_flattened - S_reconstructed_dynamic_flat)
        mse = reconstruction_error / S_flattened.size
        penalty_mse = (mse - 0.05) * 1e6 if mse > 0.05 else 0
        return reconstruction_error + penalty_mse

    # HCPSA Optimization
    optimal_components = hcpsa_optimization(
        objective_function=reconstruction_error_with_mse_penalty,
        L=100,
        U=n_components_dynamic,
        map_type="logistic",
        max_iter=100
    )

    optimal_components = int(optimal_components)
    pca_hcpsa = PCA(n_components=optimal_components)
    S_pca_hcpsa = pca_hcpsa.fit_transform(S_flattened)
    S_reconstructed_hcpsa_flat = pca_hcpsa.inverse_transform(S_pca_hcpsa)
    S_reconstructed_hcpsa = S_reconstructed_hcpsa_flat.T

    # Save outputs
    optimized_spectrogram_path = "optimized_spectrogram.npy"
    np.save(optimized_spectrogram_path, S_pca_hcpsa)

    S_reconstructed_hcpsa_power = librosa.db_to_power(S_reconstructed_hcpsa)
    y_reconstructed = librosa.feature.inverse.mel_to_audio(S_reconstructed_hcpsa_power, sr=sr, n_iter=64, fmax=fmax, power=2.0)
    reconstructed_audio_path = file_path+"_compressed.wav"
    sf.write(reconstructed_audio_path, y_reconstructed, sr)

    end_time = time.time()

    return optimized_spectrogram_path, reconstructed_audio_path, end_time - start_time

# Gradio interface function
def gradio_interface(file):
    optimized_spectrogram, reconstructed_audio, time_taken = process_audio(file.name)
    return optimized_spectrogram, reconstructed_audio, f"Time taken: {time_taken:.2f} seconds"

# Gradio UI
interface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.File(label="Upload .wav file"),
    outputs=[
        gr.File(label="Download Optimized Spectrogram"),
        gr.File(label="Download Compressed Audio"),

    ],
    title="Audio Compression with HCPSA + PCA",
    description="Upload a .wav file to optimize its spectrogram and reconstruct the audio using HCPSA. You can save the audio as a spectrogram and reconstruct it later, essentially making it a more compressed size of ur audio or save the compressed audio directly but it wont be as compressed as the spectrogram.\nContaact me at maddi.rishi2468@gmail.com or message me at linkedin : https://www.linkedin.com/in/rishi-maddi/ for any queries.\n🫡🫡"
)

interface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://751a02176de89cabde.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


