Finalized flat version

In [None]:
import random
from pydub import AudioSegment
import numpy as np
import time

def time_domain(audio):
  speed_factor = random.uniform(0.8, 1.2)
  pitch_shift = random.uniform(0.95, 1.05)
  manipulated_audio = audio.set_frame_rate(int(audio.frame_rate * pitch_shift))
  return manipulated_audio

def freq_domain(bee_sound):
  pink_noise = np.random.normal(0, 0.1, len(bee_sound))
  manipulated_audio = bee_sound.overlay(AudioSegment(pink_noise.tobytes(), frame_rate=bee_sound.frame_rate, sample_width=bee_sound.sample_width, channels=1))
  return manipulated_audio

def randomized(bee_sound):
  silence_duration = 100  # in milliseconds
  gap_length = random.randint(1, 3) * silence_duration
  gap = AudioSegment.silent(duration=gap_length)
  manipulated_audio = bee_sound.overlay(gap)

  return manipulated_audio

audio = AudioSegment.from_file("/content/drive/MyDrive/bee sound/data/CF003 - Active - Day - (214).wav")
audio = audio[:4000]

num_list = [1,2, 3]

start_time = time.time()

for i in range(1):
  random_num = random.choice(num_list)
  if(random_num==1):
    manipulated = time_domain(audio)
    manipulated.export(f"output_sound_1_{i}.mp3", format="mp3")

  elif(random_num==2):
    manipulated = freq_domain(audio)
    manipulated.export(f"output_sound_2_{i}.mp3", format="mp3")

  elif(random_num==3):
    manipulated = randomized(audio)
    manipulated.export(f"output_sound_3_{i}.mp3", format="mp3")

print("--- %s seconds ---" % (time.time() - start_time))

Spectra plots and correlation calculations

In [None]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import correlate2d

# Load the two audio files
audio_file1 = "/content/audio.mp3"  # Replace with your audio file paths
audio_file2 = "/content/output_sound_1_0.mp3"

# Load audio data and sample rate
y1, sr1 = librosa.load(audio_file1)
y2, sr2 = librosa.load(audio_file2)

# Compute spectrograms
D1 = librosa.amplitude_to_db(np.abs(librosa.stft(y1)), ref=np.max)
D2 = librosa.amplitude_to_db(np.abs(librosa.stft(y2)), ref=np.max)

# Visualize the spectrograms
plt.figure(figsize=(12, 8))
#plt.subplot(2, 1, 1)
librosa.display.specshow(D1, sr=sr1, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Log-frequency Spectrogram of Original Bee Sound')
plt.tight_layout()
plt.savefig("1.png")
plt.show()

plt.figure(figsize=(12, 8))
librosa.display.specshow(D2, sr=sr2, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Log-frequency Spectrogram of Modified Bee Sound')
plt.tight_layout()
plt.savefig("2.png")
plt.show()

# Perform cross-correlation between the two spectrograms
correlation = correlate2d(D1, D2, boundary='symm', mode='same')

max_corr = np.max(correlation)
lag = np.argmax(correlation)

print(f"Maximum Correlation: {max_corr}")
print(f"Time Lag (in frames): {lag}")
