##**1. Importing the Required Libraries**

In [None]:
import os

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

# librosa is a Python library for analyzing audio and music.
# It can be used to extract the data from the audio files we will see it later
import librosa
import torch
import librosa.display
import warnings
warnings.filterwarnings("ignore")
# to play the audio files
from IPython.display import Audio
plt.style.use('seaborn-white')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **2. Feature Extraction using wavLM**

In [None]:
import torchaudio


In [None]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m66.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m35.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m85.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m86.2 MB/s[0m eta [36m0:00:0

In [None]:
from transformers import AutoProcessor, WavLMModel, Wav2Vec2Processor
processor = Wav2Vec2Processor.from_pretrained("patrickvonplaten/wavlm-libri-clean-100h-base-plus")
wavlm = WavLMModel.from_pretrained("patrickvonplaten/wavlm-libri-clean-100h-base-plus")# audio file is decoded on the fly

def extract_features(path):
    sample_rate = 16000
    array, fs = torchaudio.load(path)
    input = processor(array.squeeze(), sampling_rate= sample_rate, return_tensors="pt")
    # apply the model to the input array from wav
    with torch.no_grad():
       outputs = wavlm(**input)
    # extract last hidden state, compute average, convert to numpy
    last_hidden_states = outputs.last_hidden_state.squeeze().mean(axis=0).numpy()
    return last_hidden_states

Downloading (…)rocessor_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/294 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/2.27k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/378M [00:00<?, ?B/s]

## **2.1 Training Dataset**

## **2.1.1 Dataset loading**

In [None]:
csv_file_path = '/content/drive/MyDrive/Audio_Data/annotations/annotations/Haryanvi_train.csv'
# Read the CSV file into a DataFrame
har_train = pd.read_csv(csv_file_path)
har_train.head()

Unnamed: 0,filename,label
0,Abuse_1.wav,No
1,Abuse_2.wav,Yes
2,Abuse_4.wav,No
3,Abuse_5.wav,Yes
4,Abuse_6.wav,Yes


In [None]:
har_train.shape

(818, 2)

## **2.1.2 Feature Extraction**

In [None]:
image_directory_har = '/content/drive/MyDrive/Audio_Data/Indian_Language_Audio_Dataset/SC_audio_Haryanvi'
har_train['filename']= image_directory_har + "/" + har_train['filename']
har_train.head()

Unnamed: 0,filename,label
0,/content/drive/MyDrive/Audio_Data/Indian_Langu...,No
1,/content/drive/MyDrive/Audio_Data/Indian_Langu...,Yes
2,/content/drive/MyDrive/Audio_Data/Indian_Langu...,No
3,/content/drive/MyDrive/Audio_Data/Indian_Langu...,Yes
4,/content/drive/MyDrive/Audio_Data/Indian_Langu...,Yes


In [None]:
#save embedding in Numpy


In [None]:
har_train_embeddings = []
for i in range(len(har_train)):
    features = extract_features(har_train['filename'][i])
    har_train_embeddings.append(features)

har_train_embeddings = np.array(har_train_embeddings)
print(har_train_embeddings.shape)

(818, 768)


In [None]:
np.save('/content/drive/MyDrive/Audio_Data/Output/haryanvi_train_embeddings.npy',har_train_embeddings)

print("haryanvi_train_embeddings.npy file is saved")


haryanvi_train_embeddings.npy file is saved


## **2.2 Testing Dataset**

## **2.2.1 Dataset Loading**

In [None]:
csv_file_path_test = '/content/drive/MyDrive/Audio_Data/annotations/annotations/Haryanvi_test.csv'
# Read the CSV file into a DataFrame
har_test = pd.read_csv(csv_file_path_test)
har_test.head()

Unnamed: 0,filename,label
0,Abuse_0.wav,Yes
1,Abuse_3.wav,Yes
2,Abuse_7.wav,No
3,Abuse_11.wav,No
4,Abuse_14.wav,Yes


In [None]:
har_test['filename']= image_directory_har + "/" + har_test['filename']
har_test.head()

Unnamed: 0,filename,label
0,/content/drive/MyDrive/Audio_Data/Indian_Langu...,Yes
1,/content/drive/MyDrive/Audio_Data/Indian_Langu...,Yes
2,/content/drive/MyDrive/Audio_Data/Indian_Langu...,No
3,/content/drive/MyDrive/Audio_Data/Indian_Langu...,No
4,/content/drive/MyDrive/Audio_Data/Indian_Langu...,Yes


In [None]:
feats_test = np.array(extract_features(har_test['filename'][0]))
feats_test.shape

(768,)

## **2.2.2 Feature Extraction**

In [None]:
har_test_embeddings = []
for i in range(len(har_test)):
    features_test = extract_features(har_test['filename'][i])
    har_test_embeddings.append(features_test)

har_test_embeddings = np.array(har_test_embeddings)
print(har_test_embeddings.shape)

(366, 768)


In [None]:
np.save('/content/drive/MyDrive/Audio_Data/Output/haryanvi_test_embeddings.npy',har_test_embeddings)

print("har_test_embeddings.npy file is saved")

har_test_embeddings.npy file is saved
