<a href="https://colab.research.google.com/github/KaifAhmad1/Audio-Emotion-Analysis/blob/main/Audio_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Audio Data Preprocessing and Audio Sentiment and Emotion Analysis:

**Installing Necessary Dependencies:**

In [76]:
!pip install -qU \
     librosa \
     plotly

In [77]:
import numpy as np
import pandas as pd

In [78]:
trainset_data = '/content/drive/MyDrive/audio-sentiment-data/TEST'
testset_data = '/content/drive/MyDrive/audio-sentiment-data/TRAIN'
file_path = '/content/drive/MyDrive/audio-sentiment-data/speech-emotion-data.csv'
speech_data = pd.read_csv(file_path)

In [79]:
trainset_data

'/content/drive/MyDrive/audio-sentiment-data/TEST'

In [80]:
testset_data

'/content/drive/MyDrive/audio-sentiment-data/TRAIN'

**Data Exploration:**

In [81]:
speech_data

Unnamed: 0,Filename,Class
0,346.wav,Negative
1,163.wav,Neutral
2,288.wav,Negative
3,279.wav,Negative
4,244.wav,Negative
...,...,...
245,204.wav,Neutral
246,46.wav,Positive
247,318.wav,Negative
248,49.wav,Positive


In [82]:
speech_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Filename  250 non-null    object
 1   Class     250 non-null    object
dtypes: object(2)
memory usage: 4.0+ KB


In [83]:
speech_data.describe()

Unnamed: 0,Filename,Class
count,250,250
unique,250,3
top,346.wav,Negative
freq,1,87


In [84]:
num_samples = speech_data.shape[0]
print(f"Dataset has {num_samples} samples")

print("Count of Positive and Negative samples:")
class_counts = speech_data['Class'].value_counts().reset_index()
print(class_counts)

Dataset has 250 samples
Count of Positive and Negative samples:
      Class  count
0  Negative     87
1  Positive     82
2   Neutral     81


**Audio Preprocessing:**

In [85]:
import librosa
# Set the sample rate for audio processing
sample_rate = 44100
def load_audio(file_path):
    # Load audio using librosa with specified parameters
    return librosa.load(file_path, sr=sample_rate, duration=2.5, offset=0.5)

In [86]:
# Audio Feature Extraction using Mel Spectrogram
import os
import sys
import numpy as np

def loadAudio(fp):
    return librosa.load(fp, res_type='kaiser_fast', duration=2.5, offset=0.5, sr=sample_rate)

def scanFeatures(path, avgFeat=0):
    # Initialize variables
    features, minFeat, maxFeat = [], sys.maxsize, 0
    files = sorted(os.listdir(path))

    # Load and process each audio file
    for i, fp in enumerate(files):
        X, sr = loadAudio(os.path.join(path, fp))
        f = librosa.feature.melspectrogram(y=X, sr=sample_rate)
        f = librosa.amplitude_to_db(f, ref=np.max)

        # Track min and max feature lengths
        shapeY = f.shape[1]
        minFeat, maxFeat = min(shapeY, minFeat), max(shapeY, maxFeat)
        features.append(f)

    # Calculate average feature length if not specified
    avgFeat = avgFeat if avgFeat != 0 else int((minFeat + maxFeat) / 2)

    # Initialize and fill the feature matrix
    feat_mat = np.zeros((len(files), f.shape[0], avgFeat))
    for i, x in enumerate(features):
        xWidth = min(x.shape[1], avgFeat)
        feat_mat[i, :, :xWidth] = x[:, :xWidth]

    return feat_mat, files

In [87]:
# Set feature dimension
f_dim = 128

# Load and process training data
train_data, train_files = scanFeatures(trainset_data, f_dim)

# Load and process test data with the same feature dimension
test_data, test_files = scanFeatures(testset_data, train_data.shape[1])

# Print shapes
print("Train Shape:", train_data.shape, "| Test Shape:", test_data.shape)

Train Shape: (110, 128, 128) | Test Shape: (250, 128, 128)


In [88]:
import plotly.graph_objects as go
import plotly.io as pio

# MelSpec to Images using Plotly
def saveImg(f, fp):
    # Create a 2D heatmap using Plotly
    fig = go.Figure(data=go.Heatmap(z=f[::-1], colorscale='Viridis'))

    # Hide the axes and save the image as HTML
    fig.update_layout(xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                      yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))

    # Save the Plotly figure as an HTML file
    pio.write_html(fig, fp)

In [89]:
def saveFeatureToImage(path, saveDir, avgFeat=0):
    global sample_rate
    files = sorted(os.listdir(path))
    print("Scanning", path)

    for i, fp in enumerate(files):
        # Load audio and compute Mel spectrogram
        X, sr = loadAudio(os.path.join(path, fp))
        f = librosa.feature.melspectrogram(y=X, sr=sample_rate)
        f = librosa.amplitude_to_db(f, ref=np.max)

        # Create an image matrix and copy Mel spectrogram data
        img = np.zeros((f.shape[0], avgFeat))
        xWidth = min(f.shape[1], avgFeat)
        img[:, :xWidth] = f[:, :xWidth]

        # Save the image with the same filename and .html extension
        fname = os.path.join(saveDir, fp.split('.')[0] + '.html')
        saveImg(img, fname)

In [90]:
# Set feature dimension
f_dim = 128
# Set directories for training and test images
train_img_dir = '/content/train_images'
test_img_dir = '/content/test_images'

# Create training image directory if not exists and save Mel spectrogram images
if not os.path.exists(train_img_dir):
    os.mkdir(train_img_dir)
    saveFeatureToImage(trainset_data, train_img_dir, f_dim)

# Create test image directory if not exists and save Mel spectrogram images using the same feature dimension as training data
if not os.path.exists(test_img_dir):
    os.mkdir(test_img_dir)
    saveFeatureToImage(testset_data, test_img_dir, train_data.shape[1])

In [91]:
# Scan image features from a directory
def scanImgFeatures(path):
    # Initialize list for image features
    features = []

    # Get sorted list of files
    files = sorted(os.listdir(path))

    # Iterate over files
    for x in files:
        # Read and normalize image
        img = imread(os.path.join(path, x))[:,:,:3] / 255.0

        # Append image feature
        features.append(img)

    # Return image features and file names
    return np.array(features), files

In [92]:
import os

def create_directory(directory_path):
    if not os.path.exists(directory_path):
        os.mkdir(directory_path)

def process_image_directory(data_path, img_dir, feature_dimension):
    create_directory(img_dir)
    saveFeatureToImage(data_path, img_dir, feature_dimension)

f_dim = 128
train_img_dir = '/content/train_images'
test_img_dir = '/content/test_images'

process_image_directory(trainset_data, train_img_dir, f_dim)
process_image_directory(testset_data, test_img_dir, train_data.shape[1])

Scanning /content/drive/MyDrive/audio-sentiment-data/TEST
Scanning /content/drive/MyDrive/audio-sentiment-data/TRAIN


In [93]:
def scan_img_features(path):
    features = []
    files = sorted(os.listdir(path))

    for file_name in files:
        file_path = os.path.join(path, file_name)
        img = imread(file_path)[:,:,:3] / 255.0
        features.append(img)

    return np.array(features), files

In [96]:
def scan_img_features(path):
    features = []
    files = sorted(os.listdir(path))

    for file_name in files:
        file_path = os.path.join(path, file_name)

        try:
            img = imread(file_path)[:,:,:3] / 255.0
            features.append(img)
        except Exception as e:
            print(f"Skipping {file_name} due to error: {e}")

    return np.array(features), files

In [95]:
if os.path.exists(train_img_dir):
    train_data_img, train_files_img = scanImgFeatures(train_img_dir)
if os.path.exists(test_img_dir):
    test_data_img, test_files_img = scanImgFeatures(test_img_dir)
    plt.imshow(test_data_img[0])
    plt.show()

UnidentifiedImageError: cannot identify image file '/content/train_images/112.html'

In [99]:
def getPathLabels(p):
    return [speech_data[speech_data['Filename'] == x].iloc[0,1] for x in p]
train_labels = getPathLabels(train_files)

IndexError: index 0 is out of bounds for axis 0 with size 0