In [None]:
import numpy as np
import matplotlib.pyplot as plt
import librosa

# Read the audio file
y, fs = librosa.load('/content/drive/MyDrive/harvard.wav', sr=None)

# Define frame size and overlap (in samples)
frame_size = 256
overlap = 128

# Calculate number of frames
num_frames = (len(y) - frame_size) // (frame_size - overlap) + 1

# Initialize variables
voiced_frames = []
unvoiced_frames = []
silence_frames = []

# Iterate through each frame
for i in range(num_frames):
    # Extract current frame
    start_idx = i * (frame_size - overlap)
    end_idx = start_idx + frame_size
    frame = y[start_idx:end_idx]

    # Calculate energy of the frame
    energy = np.sum(np.abs(frame)**2)

    # Calculate zero-crossing rate (ZCR)
    zcr = np.sum(np.diff(np.sign(frame)) != 0)

    # Thresholds for voiced, unvoiced, and silence detection
    voiced_threshold = 0.01 * np.max(energy)  # adjust threshold based on your audio
    unvoiced_threshold = 0.001 * np.max(energy)  # adjust threshold based on your audio
    silence_threshold = 0.0001 * np.max(energy)  # adjust threshold based on your audio

    # Identify frame type based on energy and ZCR
    if energy > voiced_threshold and zcr > 10:  # adjust values for voiced detection
        voiced_frames.append(i)
    elif energy > unvoiced_threshold and zcr < 10:  # adjust values for unvoiced detection
        unvoiced_frames.append(i)
    else:
        silence_frames.append(i)

# Calculate time axis for plotting
time_axis = np.arange(len(y)) / fs

# Plot the original signal separately
plt.figure(figsize=(10, 6))

# Plot original signal
plt.subplot(4, 1, 1)
plt.plot(time_axis, y, 'k')
plt.ylabel('Amplitude')
plt.title('Original Signal')

# Plot voiced segments
plt.subplot(4, 1, 2)
for i in voiced_frames:
    start_idx = i * (frame_size - overlap)
    end_idx = start_idx + frame_size
    plt.plot(time_axis[start_idx:end_idx], y[start_idx:end_idx], 'g')
plt.ylabel('Amplitude')
plt.title('Voiced Regions')

# Plot unvoiced segments
plt.subplot(4, 1, 3)
for i in unvoiced_frames:
    start_idx = i * (frame_size - overlap)
    end_idx = start_idx + frame_size
    plt.plot(time_axis[start_idx:end_idx], y[start_idx:end_idx], 'r')
plt.ylabel('Amplitude')
plt.title('Unvoiced Regions')

# Plot silence segments
plt.subplot(4, 1, 4)
for i in silence_frames:
    start_idx = i * (frame_size - overlap)
    end_idx = start_idx + frame_size
    plt.plot(time_axis[start_idx:end_idx], y[start_idx:end_idx], 'b')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.title('Silence Regions')

# Adjust layout
plt.tight_layout()
plt.show()
