In [1]:
import librosa
import numpy as np
from keras.models import model_from_json
from keras.utils import np_utils
from tensorflow.keras.models import load_model

  "class": algorithms.Blowfish,


In [25]:
# Load the saved model architecture from JSON file
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)

# Load the saved model weights from H5 file
model.load_weights('model.h5')
# Load the model
model = load_model('model.h5')

In [2]:
# Function to add noise to data
def add_noise(data):
    # Calculate noise amplitude using uniform random value between 0 and 0.04 times the maximum value of data
    noise_amplitude = 0.04*np.random.uniform()*np.amax(data)
    # Add the calculated noise to the data
    data = data + noise_amplitude*np.random.normal(size=data.shape[0])
    return data

In [3]:
# Function to shift the data
def shift(data):
    # Calculate the shift range using uniform random value between -5 and 5 multiplied by 1000
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    # Shift the data using numpy's roll function
    return np.roll(data, shift_range)

In [4]:
# Function to increase the speed of the data
def increase_speed(data, speed_factor = 1.25):
    # Increase the speed of the data using librosa's time_stretch function
    return librosa.effects.time_stretch(data, speed_factor)

# Function to decrease the speed of the data
def decrease_speed(data, speed_factor = 0.75):
    # Decrease the speed of the data using librosa's time_stretch function
    return librosa.effects.time_stretch(data, speed_factor)

In [5]:
# Function to stretch the data
def stretch(data, rate=0.70):
    # Stretch the data using librosa's time_stretch function
    return librosa.effects.time_stretch(data, rate)

In [6]:
# Function to change the pitch of the data
def change_pitch(data, sampling_rate, pitch_factor=0.8):
    # Change the pitch of the data using librosa's pitch_shift function
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)

In [7]:
# Define the function to extract features from the audio file
def extract_features(data):
    result = np.array([])
    
    # Compute the Mel-frequency cepstral coefficients (MFCCs)
    # Use 58 MFCCs to get ~60 ms frames
    mfccs = librosa.feature.mfcc(y=data, sr=22050, n_mfcc=58)
    
    # Compute the average MFCCs for each frame
    mfccs_processed = np.mean(mfccs.T,axis=0)
    
    # Add the processed MFCCs to the result array
    result = np.array(mfccs_processed)
    
    return result

In [8]:
# Define the function to get features for a given audio file
def get_features(path):
    # Load the audio file, taking care of the no audio at the start and end of the file
    data, sample_rate = librosa.load(path, duration=3, offset=0.5, res_type='kaiser_fast') 
    
    # Extract features without augmentation
    result_1 = extract_features(data)
    result = np.array(result_1)
    
    # Extract features with added noise
    noise_data = add_noise(data)
    result_2 = extract_features(noise_data)
    result = np.vstack((result, result_2)) # Stack the results vertically
    
    # Extract features with time stretching
    stretch_data = stretch(data)
    result_3 = extract_features(stretch_data)
    result = np.vstack((result, result_3))
    
    # Extract features with time shifting
    shift_data = shift(data)
    result_4 = extract_features(shift_data)
    result = np.vstack((result, result_4))
    
    # Extract features with pitch shifting
    pitch_data = change_pitch(data, sample_rate)
    result_5 = extract_features(pitch_data)
    result = np.vstack((result, result_5)) 
    
    # Extract features with increased speed
    higher_speed_data = increase_speed(data)
    result_6 = extract_features(higher_speed_data)
    result = np.vstack((result, result_6))
    
    # Extract features with decreased speed
    lower_speed_data = decrease_speed(data)
    result_7 = extract_features(lower_speed_data)
    result = np.vstack((result, result_7))
    
    return result

In [33]:
# Function to make a prediction for a given audio file
def predict_emotion(file_path):
    # Get the features for the audio file
    features = get_features(file_path)

    # Normalize the features
    features = (features - np.mean(features)) / np.std(features)

    # Add an extra dimension to match the input shape of the model
    features = np.expand_dims(features, axis=0)

    # Make the prediction using the trained model
    prediction = model.predict(features)

    # Get the predicted label
    label = np.argmax(prediction)

    # Return the predicted emotion label
    if label == 0:
        return 'neutral'
    elif label == 1:
        return 'calm'
    elif label == 2:
        return 'happy'
    elif label == 3:
        return 'sad'
    elif label == 4:
        return 'angry'
    elif label == 5:
        return 'fear'
    elif label == 6:
        return 'disgust'
    else:
        return 'surprise'


In [34]:
print(predict_emotion("D:\\final_voice_model_3\\RAVDESS\\Actor_01\\03-01-01-01-01-01-01.wav"))

  return librosa.effects.time_stretch(data, rate)
  return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)
  return librosa.effects.time_stretch(data, speed_factor)
  return librosa.effects.time_stretch(data, speed_factor)


ValueError: in user code:

    File "D:\Anaconda\lib\site-packages\keras\engine\training.py", line 2041, in predict_function  *
        return step_function(self, iterator)
    File "D:\Anaconda\lib\site-packages\keras\engine\training.py", line 2027, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "D:\Anaconda\lib\site-packages\keras\engine\training.py", line 2015, in run_step  **
        outputs = model.predict_step(data)
    File "D:\Anaconda\lib\site-packages\keras\engine\training.py", line 1983, in predict_step
        return self(x, training=False)
    File "D:\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "D:\Anaconda\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 58, 1), found shape=(None, 7, 58)


In [7]:
# Define the function to predict the mood of an audio file
def predict_mood(file_path):
    # Load the audio file, taking care of the no audio at the start and end of the file
    data, sample_rate = librosa.load(file_path, duration=3, offset=0.5, res_type='kaiser_fast') 
    # Get the features of the audio file
    features = extract_features(data)
    
    # Reshape the features to match the input shape of the model
    features = features.reshape(features.shape[0], features.shape[1], 1)
    
    # Normalize the features
    features = features / np.max(features)
    
    # Use the model to predict the mood of the audio file
    predictions = model.predict(features)
    
    # Get the predicted emotion
    emotions = np.argmax(predictions)

    return emotions

In [8]:
print(emotions[predict_mood("D:\\final_voice_model_3\\RAVDESS\\Actor_01\\03-01-01-01-01-01-01.wav")])

IndexError: tuple index out of range

In [18]:
# to import .ipynb files
import import_ipynb

import pandas as pd
import numpy as np

In [19]:
def extract_audio_features(audio_path,sampling_rate):
    # Load audio from the given path and set the sampling rate
    X, sample_rate = librosa.load(audio_path ,res_type='kaiser_fast',duration=2.5,sr=sampling_rate*2,offset=0.5)
    sample_rate = np.array(sample_rate)

    # Separate harmonic and percussive components of the audio
    y_harmonic, y_percussive = librosa.effects.hpss(X)
    # Extract pitch and magnitudes of the audio
    pitches, magnitudes = librosa.core.pitch.piptrack(y=X, sr=sample_rate)

    # Extract the mean of the Mel-Frequency Cepstral Coefficients (MFCCs) of the audio
    mfccs = np.mean(librosa.feature.mfcc(y=X,sr=sample_rate,n_mfcc=13),axis=1)

    # Extract the mean of the pitches and remove trailing zeroes
    pitches = np.trim_zeros(np.mean(pitches,axis=1))[:20]

    # Extract the mean of the magnitudes and remove trailing zeroes
    magnitudes = np.trim_zeros(np.mean(magnitudes,axis=1))[:20]

    # Extract the mean of the chroma feature of the audio
    chromas = np.mean(librosa.feature.chroma_cqt(y=y_harmonic, sr=sampling_rate),axis=1)
    
    # Return a list of features including the MFCCs, pitches, magnitudes, and chroma feature
    return [mfccs, pitches, magnitudes, chromas]

In [20]:
demo_audio_path = "D:\\final_voice_model_3\\RAVDESS\\Actor_01\\03-01-01-01-01-01-01.wav"

In [21]:
# Extract the audio features (MFCC, pitch, magnitude, and chroma)
demo_mfcc, demo_pitch, demo_mag, demo_chrom = extract_audio_features(demo_audio_path,20000)

# Convert the audio features to Pandas Series
mfcc = pd.Series(demo_mfcc)
pit = pd.Series(demo_pitch)
mag = pd.Series(demo_mag)
C = pd.Series(demo_chrom)
# Concatenate the audio features into a single dataframe
demo_audio_features = pd.concat([mfcc,pit,mag,C],ignore_index=True)

In [22]:
# Add an additional dimension to the demo_audio_features array along the first axis (axis=0)
demo_audio_features= np.expand_dims(demo_audio_features, axis=0)
# Add another dimension to the demo_audio_features array along the second axis (axis=2)
demo_audio_features= np.expand_dims(demo_audio_features, axis=2)

In [24]:
# Use the loaded model to make predictions on the demo_audio_features
live_predictions = model.predict(demo_audio_features)

InvalidArgumentError: Graph execution error:

Detected at node 'model/flatten/Reshape' defined at (most recent call last):
    File "D:\Anaconda\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "D:\Anaconda\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "D:\Anaconda\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "D:\Anaconda\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "D:\Anaconda\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "D:\Anaconda\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "D:\Anaconda\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "D:\Anaconda\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "D:\Anaconda\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "D:\Anaconda\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "D:\Anaconda\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "D:\Anaconda\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "D:\Anaconda\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "D:\Anaconda\lib\site-packages\ipykernel\ipkernel.py", line 390, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "D:\Anaconda\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "D:\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 2914, in run_cell
      result = self._run_cell(
    File "D:\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 2960, in _run_cell
      return runner(coro)
    File "D:\Anaconda\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
      coro.send(None)
    File "D:\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "D:\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "D:\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Rashada\AppData\Local\Temp\ipykernel_5988\2801692542.py", line 2, in <module>
      live_predictions = model.predict(demo_audio_features,
    File "D:\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "D:\Anaconda\lib\site-packages\keras\engine\training.py", line 2253, in predict
      tmp_batch_outputs = self.predict_function(iterator)
    File "D:\Anaconda\lib\site-packages\keras\engine\training.py", line 2041, in predict_function
      return step_function(self, iterator)
    File "D:\Anaconda\lib\site-packages\keras\engine\training.py", line 2027, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "D:\Anaconda\lib\site-packages\keras\engine\training.py", line 2015, in run_step
      outputs = model.predict_step(data)
    File "D:\Anaconda\lib\site-packages\keras\engine\training.py", line 1983, in predict_step
      return self(x, training=False)
    File "D:\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "D:\Anaconda\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "D:\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "D:\Anaconda\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "D:\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "D:\Anaconda\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "D:\Anaconda\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "D:\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "D:\Anaconda\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "D:\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "D:\Anaconda\lib\site-packages\keras\layers\reshaping\flatten.py", line 104, in call
      return tf.reshape(inputs, flattened_shape)
Node: 'model/flatten/Reshape'
Input to reshape is a tensor with 320 values, but the requested shape requires a multiple of 256
	 [[{{node model/flatten/Reshape}}]] [Op:__inference_predict_function_611]

In [None]:
live_predictions

In [None]:
emotions=["anger","disgust","fear","happy","neutral", "sad", "surprise"]
# Get the index of the emotion with the highest probability
index = live_predictions.argmax(axis=1).item()
index

In [None]:
emotions[index]

In [13]:
print(emotions[predict_mood("D:\\final_voice_model_3\\RAVDESS\\Actor_01\\03-01-01-01-01-01-01.wav")])

  return librosa.effects.time_stretch(data, rate)
  return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)
  return librosa.effects.time_stretch(data, speed_factor)


sad


  return librosa.effects.time_stretch(data, speed_factor)


In [35]:
import librosa
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler

In [36]:
# Load the trained model
model = load_model('model.h5')

In [42]:
# Load the scaler
scaler = StandardScaler()
# scaler.mean_ = np.load('scaler_mean.npy')
# scaler.scale_ = np.load('scaler_std.npy')

In [9]:
# Define the path to the user input voice data
audio_path = "D:\\final_voice_model_3\\RAVDESS\\Actor_03\\03-01-03-02-02-02-03.wav"

In [49]:
emotions = ['Neutral', 'Calm', 'Happy', 'Sad', 'Angry', 'Fearful', 'Disgust', 'Surprised']

In [50]:
# Extract the features from the user input voice data
features = get_features(path)

# Fit the scaler to the data
scaler.fit(features)

# Scale the features using the scaler
scaled_features = scaler.transform(features)

# Reshape the features for the model input
reshaped_features = scaled_features.reshape(scaled_features.shape[0], scaled_features.shape[1], 1)

# Predict the emotion label using the trained model
prediction = model.predict(reshaped_features)

# Get the index of the predicted emotion label
predicted_emotion_index = np.argmax(prediction)

# Get the predicted emotion label
print(predicted_emotion = emotions[predicted_emotion_index])

  return librosa.effects.time_stretch(data, rate)
  return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)
  return librosa.effects.time_stretch(data, speed_factor)
  return librosa.effects.time_stretch(data, speed_factor)




IndexError: list index out of range

In [22]:
import numpy as np
import librosa
from tensorflow.keras.models import load_model

# Load the saved model architecture from JSON file
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)

# Load the saved model weights from H5 file
model.load_weights('model.h5')
# Load the model
model = load_model('model.h5')
# # Load the pre-trained model
# model = load_model('model.h5')

# Define the emotion labels
emotion_labels = ['angry','calm','disgust','fear','happy','neutral',
                'sad','surprise']

# Define the path to the audio file
audio_path = "D:\\final_voice_model\\RAVDESS\\Actor_01\\03-01-08-02-02-01-01.wav"

# Get the features for the audio file
features = get_features(audio_path)

# Reshape the features to have the same shape as the input to the model
features = np.reshape(features, (features.shape[0], features.shape[1], 1))

# Make predictions using the model
predictions = model.predict(features)

# Get the index of the predicted emotion label
predicted_label_index = np.argmax(predictions[0])

# Get the predicted emotion label
predicted_label = emotion_labels[predicted_label_index]

# Print the predicted emotion label
print('The predicted emotion is:', predicted_label)


  return librosa.effects.time_stretch(data, rate)
  return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)
  return librosa.effects.time_stretch(data, speed_factor)
  return librosa.effects.time_stretch(data, speed_factor)


The predicted emotion is: sad
