In [None]:
import librosa
import soundfile as sf

def extract_first_channel(audio_path, output_path):
    # Load the audio file
    y, sr = librosa.load(audio_path, sr=None, mono=False)
    
    # Extract the first channel
    print(len(y))
    channel_1 = y[0]
    
    # Save the first channel as a mono audio file
    sf.write(output_path, channel_1, sr)

# Path to your 32-channel audio file
input_audio_path = 'audio.wav'

# Output path for the first channel
output_audio_path = 'single.wav'

# Call the function to extract the first channel
extract_first_channel(input_audio_path, output_audio_path)

In [None]:

import matplotlib.pyplot as plt
import numpy as np

In [None]:
single_channel, sr = librosa.load(output_audio_path, sr=None)

print(len(single_channel))

#plot amplitude to time graph
plt.figure(figsize=(14, 5))
plt.plot(np.arange(len(single_channel)) / sr, single_channel)

plt.xlabel('Time (s)')
plt.ylabel('Amplitude')

plt.show()

In [None]:
# plot the spectrogram

D = librosa.amplitude_to_db(np.abs(librosa.stft(single_channel)), ref=np.max)

plt.figure(figsize=(14, 5))

librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')

plt.colorbar(format='%+2.0f dB')

plt.title('Spectrogram')

plt.show()

In [None]:
# plot amplitude to frequency graph (x-axis is frequency)

# perform Fourier transform
fft = np.fft.fft(single_channel)
# calculate abs values on complex numbers to get magnitude
spectrum = np.abs(fft)
# create frequency variable
f = np.linspace(0, sr, len(spectrum))
# take half of the spectrum and frequency
left_spectrum = spectrum[:int(len(spectrum)/2)]
left_f = f[:int(len(spectrum)/2)]
# plot spectrum
plt.figure(figsize=(15,10))
plt.plot(left_f, left_spectrum, alpha=0.4)
plt.xlabel("Frequency")
plt.ylabel("Magnitude")
plt.title("Power spectrum")
plt.show()


In [None]:
#plot spectiogram


In [None]:
full_audio, sr = librosa.load('audio.wav', sr=None, mono=None)

print(f"number of channels: {full_audio.shape[0]}")

print(f"len in seconds: {full_audio.shape[1] / sr}")

# print each channel waveform on subgrid one below the other

plt.figure(figsize=(15,25))

for i in range(full_audio.shape[0]):
    plt.subplot(full_audio.shape[0], 1, i+1)
    plt.plot(full_audio[i])
    plt.title(f"Channel {i+1}")

plt.show()






In [None]:
# determine which channel hears the sound first
# Calculate the onset strength for each channel
onset_strengths = [librosa.onset.onset_strength(y=channel, sr=sr) for channel in full_audio]


onset_times = [librosa.onset.onset_detect(onset_envelope=onset_strength, sr=sr)[0] for onset_strength in onset_strengths]

closest_mic_channel = np.argmin(onset_times) + 1
furthest_mic_channel = np.argmax(onset_times) + 1

print(onset_times)

print(closest_mic_channel, furthest_mic_channel)




In [None]:
clean_audio = full_audio[:, :3*sr]

clean_audio.shape

In [None]:
mean_am = np.mean(np.abs( clean_audio ), axis=1)

mean_am.shape
# scale mean amplitude to be between 0 and 1


print(mean_am)

plt.figure(figsize=(15,5))

print(mean_am)

plt.bar(np.arange(mean_am.shape[0]), mean_am)
plt.xticks(np.arange(mean_am.shape[0]), np.arange(1, mean_am.shape[0]+1))
plt.xlabel("Channel")
plt.ylabel("Mean amplitude")

plt.show()

# cmaybe channel 1,3 are closes to speaker because they have the highest mean amplitude during the fragment
# when the speaker is talking


In [None]:
from sklearn.decomposition import FastICA

## use fast ica to decompose single track into 2 components and save the components as new audio files
four_channels = full_audio[:8]
print(four_channels.shape)

ica = FastICA(n_components=8, whiten='arbitrary-variance')


components = ica.fit_transform(four_channels.T)

print(components.shape)

# make the components louder

components = components * 100

# save the components as audio files

for i in range(components.shape[1]):

    sf.write(f'component_{i+1}.wav', components[:, i], sr)


# now do pca

from sklearn.decomposition import PCA

pca = PCA(n_components=8)

components = pca.fit_transform(four_channels.T)

print(components.shape)

# make the components louder

components = components * 100


# save the components as audio files

for i in range(components.shape[1]):

    sf.write(f'pca_component_{i+1}.wav', components[:, i], sr)






In [None]:
# do ICA on frequency domain
four_channels = full_audio[:]

four_channels =  four_channels[:]

from ssspy.bss.ica import FastICA


def contrast_fn(x):
    return np.log(1 + np.exp(x))

def score_fn(x):
    return 1 / (1 + np.exp(-x))

def d_score_fn(x):
    sigma = 1 / (1 + np.exp(-x))
    return sigma * (1 - sigma)
     

ica = FastICA(
    contrast_fn=contrast_fn,
    score_fn=score_fn,
    d_score_fn=d_score_fn,
)
print(ica)
     
import IPython.display as ipd

waveform_est = ica(four_channels, n_iter=10)

for idx, waveform in enumerate(waveform_est):
    print("Estimated source: {}".format(idx + 1))
    # TODO uncomment this line to dispaly new audios, I had to comment it out because of the size
    # display(ipd.Audio(waveform, rate=sr))
    print()
