In [1]:
# !pip install librosa

In [2]:
# !pip show librosa
import librosa
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt

In [18]:
def extract_features(audio_data, sr):
    """Extracts features from an audio file.

    Args:
        audio_data: A NumPy array containing the audio data.
        sr: The sample rate of the audio data.

    Returns:
        A dictionary containing the extracted features.
    """

    features = {}

    features["chroma_stft"] = librosa.feature.chroma_stft(y=audio_data, sr=sr)
    features["chroma_cqt"] = librosa.feature.chroma_cqt(y=audio_data, sr=sr)
    features["chroma_cens"] = librosa.feature.chroma_cens(y=audio_data, sr=sr)
    features["melspectrogram"] = librosa.feature.melspectrogram(y=audio_data, sr=sr)
    features["mfccs"] = librosa.feature.mfcc(y=audio_data, sr=sr)
    features["rms"] = librosa.feature.rms(y=audio_data)
    features["spectral_centroid"] = librosa.feature.spectral_centroid(y=audio_data, sr=sr)
    features["spectral_bandwidth"] = librosa.feature.spectral_bandwidth(y=audio_data, sr=sr)
    features["spectral_contrast"] = librosa.feature.spectral_contrast(y=audio_data, sr=sr)
    features["spectral_flatness"] = librosa.feature.spectral_flatness(y=audio_data)
    features["spectral_rolloff"] = librosa.feature.spectral_rolloff(y=audio_data, sr=sr)
    features["poly_features"] = librosa.feature.poly_features(y=audio_data, sr=sr)
    features["zero_crossing_rate"] = librosa.feature.zero_crossing_rate(y=audio_data)

    return features

In [21]:
# audio_data, sr = librosa.load('Track 4.wav')
# features = extract_features(audio_data, sr)  
# features

In [5]:
def plot_boxplot(features):
    for feature_name, feature_values in features.items():
        plt.figure(figsize=(10, 5))
        
        # Calculate mean and median
        feature_mean = np.mean(feature_values, axis=1)
        feature_median = np.median(feature_values, axis=1)
        # Plot boxplot
        sns.boxplot(data=feature_values.T)
        plt.title(f"{feature_name}\nFeature Shape: {feature_values.shape}\nMean: {feature_mean}\nMedian: {feature_median}\nMean/Median Shape: {feature_mean.shape}")
        sns.despine()

In [5]:
plot_boxplot(features)
plt.show()

In [7]:
def plot_boxplot(features):
    for feature_name, feature_values in features.items():
        plt.figure(figsize=(10, 5))
        
        # Calculate mean and median
        feature_mean = np.mean(feature_values, axis=1)
        Final_feature_mean=np.mean(feature_mean, axis=0)
        feature_median = np.median(feature_values, axis=1)
        final_feature_median=np.median(feature_median, axis=0)
        # Plot boxplot
        sns.boxplot(data=feature_values.T)
        plt.title(f"{feature_name}\nFeature Shape: {feature_values.shape}\nMean: {feature_mean}\nMedian: {feature_median}\nMean/Median Shape: {feature_mean.shape}\nFinal Mean: {Final_feature_mean}\nFinal Median: {final_feature_median}\nMean/Final Median Shape: {final_feature_median.shape}")
        sns.despine()

In [6]:
# audio_data, sr = librosa.load('Track 4.wav')
# features = extract_features(audio_data, sr)
# features

In [7]:
plot_boxplot(features)
plt.show()

In [10]:
def calculate_mean(features):
    mean=[]
    for feature_name, feature_values in features.items():
        # Calculate mean and median
        feature_mean = np.mean(feature_values, axis=1)
        Final_feature_mean=np.mean(feature_mean, axis=0)
        mean.append(Final_feature_mean)
    return mean

In [11]:
def calculate_median(features):
    median=[]
    for feature_name, feature_values in features.items():
        # Calculate mean and median
        feature_median = np.median(feature_values, axis=1)
        final_feature_median=np.median(feature_median, axis=0)
        median.append(final_feature_median)
    return median

In [12]:
os.getcwd()

'C:\\Users\\anasa\\Desktop\\JARVIS-Urdu-Voice-Assistant-'

In [16]:
path=os.getcwd()
filename='Next Movie'
new_path=os.path.join(path,filename)
# os.chdir(new_path)
os.getcwd()
os.listdir()

['Track 1.wav', 'Track 2.wav', 'Track 4.wav']

In [18]:
X_mean=[]
X_median=[]
os.listdir()
for audio in os.listdir():
    audio_data, sr = librosa.load(audio)
    features = extract_features(audio_data, sr)
    mean=calculate_mean(features)
    X_mean.append(mean)
    median=calculate_median(features)
    X_median.append(median)
#     print(features)

In [19]:
for values in X_mean:
    print(values)

[0.4185079, 0.39693323, 0.23954631, 0.19433042, -22.467918, 0.018554302, 1889.0375295628332, 1912.963124232756, 21.942481504783977, 0.050268173, 3457.3287963867188, 0.11361375708604168, 0.1258544921875]
[0.40897265, 0.35484242, 0.22148794, 0.49207664, -19.337624, 0.027462002, 1771.4463360625566, 1981.2662233498577, 22.183138775280252, 0.051552437, 3387.945416865458, 0.16293315975194123, 0.11075038764312978]
[0.4137082, 0.38288176, 0.23340423, 0.7389196, -20.194191, 0.033283643, 1619.092845215319, 1884.3597287264981, 22.964114785219493, 0.05616525, 3054.5890561995966, 0.2189587493853099, 0.10535061743951613]


In [20]:
for values in X_median:
    print(values)

[0.3542201, 0.27387106, 0.18277133, 1.7029836e-06, -0.15099403, 0.008660844, 1460.52199743669, 2153.9320497837716, 17.512247225919594, 0.007133857, 2928.515625, 0.038039045467602006, 0.059326171875]
[0.3364193, 0.21644142, 0.15765598, 4.0824716e-06, 0.0, 0.013682889, 1586.5669386709183, 2242.3904146904856, 17.334976751333947, 0.005695775, 3133.0810546875, 0.04961909700796929, 0.0546875]
[0.31010282, 0.25298047, 0.19011202, 2.9452872e-06, 0.0, 0.008580539, 1390.0395080619387, 2117.4708168062034, 18.582634212504797, 0.0027136619, 2627.05078125, 0.03314655914592519, 0.05078125]


In [21]:
X1=pd.DataFrame(X_mean,columns=['chroma_stft','chroma_cqt',
'chroma_cens','melspectrogram','mfccs','rms','spectral_centroid','spectral_bandwidth',
                                'spectral_contrast','spectral_flatness','spectral_rolloff',
                                'poly_features','zero_crossing_rate'])
X1

Unnamed: 0,chroma_stft,chroma_cqt,chroma_cens,melspectrogram,mfccs,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,spectral_rolloff,poly_features,zero_crossing_rate
0,0.418508,0.396933,0.239546,0.19433,-22.467918,0.018554,1889.03753,1912.963124,21.942482,0.050268,3457.328796,0.113614,0.125854
1,0.408973,0.354842,0.221488,0.492077,-19.337624,0.027462,1771.446336,1981.266223,22.183139,0.051552,3387.945417,0.162933,0.11075
2,0.413708,0.382882,0.233404,0.73892,-20.194191,0.033284,1619.092845,1884.359729,22.964115,0.056165,3054.589056,0.218959,0.105351


In [22]:
X1.shape

(3, 13)

In [23]:
X2=pd.DataFrame(X_median,columns=['chroma_stft','chroma_cqt',
'chroma_cens','melspectrogram','mfccs','rms','spectral_centroid','spectral_bandwidth',
                                'spectral_contrast','spectral_flatness','spectral_rolloff',
                                'poly_features','zero_crossing_rate'])
X2

Unnamed: 0,chroma_stft,chroma_cqt,chroma_cens,melspectrogram,mfccs,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,spectral_rolloff,poly_features,zero_crossing_rate
0,0.35422,0.273871,0.182771,2e-06,-0.150994,0.008661,1460.521997,2153.93205,17.512247,0.007134,2928.515625,0.038039,0.059326
1,0.336419,0.216441,0.157656,4e-06,0.0,0.013683,1586.566939,2242.390415,17.334977,0.005696,3133.081055,0.049619,0.054688
2,0.310103,0.25298,0.190112,3e-06,0.0,0.008581,1390.039508,2117.470817,18.582634,0.002714,2627.050781,0.033147,0.050781


In [9]:
class Audiofile:
    def __init__(self):
        # Private list of folder names
#         self._folder_names = [
#             'assistance off',
#             'assistance on',
#             "don't listen while you speak",
#             'create a new folder.',
#             'Turn on Wi-Fi.',
#             'Turn off Wi-Fi.',
#             'Turn on Bluetooth.',
#             'Turn off Bluetooth.',
#             'Show notifications.',
#             'Open control panel',
#             'Stop Movie',
#             'Play Movie',
#             'Next Movie',
#             'Unmute Volume',
#             'Volume Down',
#             'Volume Up',
#             'Open Start Menu',
#             'search for a specific file',
#             'zoom in',
#             'zoom out',
#             'Open Google.com'
#         ]
        # Private dict of folder paths
        self._folder_paths = {
            'assistance off': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Assistance off",
            'assistance on': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Assistance on",
            'Turn off Wi-Fi.': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Turn off wifi",
            'Turn off Bluetooth.': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Turn of bluetooth",
            'Open control panel': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\open control panel",
            'Stop Movie': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Stop movie",
            'Play Movie': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Play movie",
            'Next Movie': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Next Movie",
            'Unmute Volume': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Unmute Volume",
            'Volume Down': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Volume Down",
            'Volume Up': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Volume up",
            'Open Start Menu': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Open start menu",
            'zoom in': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Zoom in",
            'zoom out': r"C:\Users\anasa\Desktop\JARVIS-Urdu-Voice-Assistant-\Zoom out",
        }
    def get_folder_names(self):
        return list(self._folder_paths.keys())
    def get_folderpaths(self):
        # Function to get the path of folder
        return self._folder_paths

In [11]:
audio = Audiofile()
folders = audio.get_folder_names()
print(folders)

['assistance off', 'assistance on', 'Turn off Wi-Fi.', 'Turn off Bluetooth.', 'Open control panel', 'Stop Movie', 'Play Movie', 'Next Movie', 'Unmute Volume', 'Volume Down', 'Volume Up', 'Open Start Menu', 'zoom in', 'zoom out']


In [46]:
# path=audio.get_folderpaths('assistance on')
# path

'C:\\Users\\anasa\\Desktop\\JARVIS-Urdu-Voice-Assistant-\\Assistance on'

In [22]:
class Feature_Extraction:
#     def __init__(self):
#         self.features={}
    def extract_features(audio_data, sr):
        features = {}
        features["chroma_stft"] = librosa.feature.chroma_stft(y=audio_data, sr=sr)
        features["chroma_cqt"] = librosa.feature.chroma_cqt(y=audio_data, sr=sr)
        features["chroma_cens"] = librosa.feature.chroma_cens(y=audio_data, sr=sr)
        features["melspectrogram"] = librosa.feature.melspectrogram(y=audio_data, sr=sr)
        features["mfccs"] = librosa.feature.mfcc(y=audio_data, sr=sr)
        features["rms"] = librosa.feature.rms(y=audio_data)
        features["spectral_centroid"] = librosa.feature.spectral_centroid(y=audio_data, sr=sr)
        features["spectral_bandwidth"] = librosa.feature.spectral_bandwidth(y=audio_data, sr=sr)
        features["spectral_contrast"] = librosa.feature.spectral_contrast(y=audio_data, sr=sr)
        features["spectral_flatness"] = librosa.feature.spectral_flatness(y=audio_data)
        features["spectral_rolloff"] = librosa.feature.spectral_rolloff(y=audio_data, sr=sr)
        features["poly_features"] = librosa.feature.poly_features(y=audio_data, sr=sr)
        features["zero_crossing_rate"] = librosa.feature.zero_crossing_rate(y=audio_data)

        return features
        

In [38]:
class Data_Preprocessing:  
    
    def __init__(self):
        self.X=[]
    def calculate_mean(self,features):
        mean=[]
        for feature_name, feature_values in features.items():
            # Calculate mean
            feature_mean = np.mean(feature_values, axis=1)
            Final_feature_mean=np.mean(feature_mean, axis=0)
            mean.append(Final_feature_mean)
        return mean
    def preprocessing(self):
        audios=Audiofile()
        feature_extractor=Feature_Extraction()
        paths =audios.get_folderpaths()
        for folder,path in paths.items():
#             path=audios.get_folderpaths(folder)
#             print(folder," : ",path)
            os.chdir(path)
#             print()
#             print(os.getcwd(),"\n",os.listdir())
            for one in os.listdir():
                audio_data,sr=librosa.load(one)
                features = extract_features(audio_data, sr)
                mean=self.calculate_mean(features)
                mean.append(folder)
                self.X.append(mean)
#             self.X.append(folder)

In [39]:
data=Data_Preprocessing()
data.preprocessing()



In [40]:
XX=pd.DataFrame(data.X,columns=['chroma_stft','chroma_cqt',
'chroma_cens','melspectrogram','mfccs','rms','spectral_centroid','spectral_bandwidth',
                                'spectral_contrast','spectral_flatness','spectral_rolloff',
                                'poly_features','zero_crossing_rate','class'])
XX
# data.X

Unnamed: 0,chroma_stft,chroma_cqt,chroma_cens,melspectrogram,mfccs,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,spectral_rolloff,poly_features,zero_crossing_rate,class
0,0.392101,0.433277,0.242936,1.451152,-13.043401,0.057793,1488.516951,1655.304808,18.637944,0.035536,2789.129545,0.576072,0.069899,assistance off
1,0.302495,0.372940,0.245439,2.227921,-11.281276,0.076170,1261.275967,1564.335617,19.578484,0.028953,2503.291530,0.717842,0.050817,assistance off
2,0.376695,0.419409,0.242149,1.495821,-11.762228,0.060381,1408.103133,1656.593533,19.012299,0.029437,2677.223145,0.620585,0.062710,assistance off
3,0.385027,0.421750,0.243437,1.527858,-12.482666,0.061625,1450.312847,1648.888122,18.969604,0.040412,2808.116033,0.645264,0.065782,assistance off
4,0.369200,0.406338,0.237796,1.543871,-12.507836,0.063178,1421.990520,1616.881403,19.152994,0.037937,2694.227013,0.662252,0.068034,assistance off
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,0.377664,0.393746,0.241956,1.781207,-9.676538,0.069871,1449.696298,1749.786653,19.336373,0.011490,2816.353666,0.659362,0.061518,zoom out
70,0.357969,0.421060,0.248864,2.034776,-11.997432,0.071861,1326.088085,1629.781331,19.214311,0.038968,2580.643016,0.670801,0.047341,zoom out
71,0.351812,0.386604,0.231970,1.937666,-11.332945,0.073789,1524.450088,1713.967061,19.131933,0.031180,2901.553110,0.687043,0.071389,zoom out
72,0.358146,0.386234,0.238043,2.152818,-11.213063,0.076320,1479.549037,1685.846194,19.321173,0.014657,2887.565730,0.728412,0.065304,zoom out
