This Model provides a complete workflow for converting audio to text, analyzing the sentiment of the text, and classifying the overall sentiment of a conversation.

Install necassary libraries


In [None]:
!pip install SpeechRecognition
!pip install vaderSentiment
!pip install pydub
!pip install ffmpeg

from IPython.display import clear_output
clear_output()
print("\nDone")

Objective: Convert audio into text.
Process:


  torch: A library for tensor computation and deep learning.

  gdown: Used to download files from Google Drive.

  transformers: Contains classes for handling pre-trained models from the Hugging Face library, specifically for BERT in this case.

  speech_recognition: A library for converting speech into text.

  pydub: A library for audio manipulation.


  Use pydub to load the audio file and convert it to a mono channel with a 16kHz sample rate.

  Export the audio as a WAV file.
  
  Use speech_recognition to transcribe the audio to text using Google’s speech recognition service.

In [None]:
import torch
import gdown
from transformers import BertTokenizer, BertForSequenceClassification
from torch.nn.functional import softmax
import speech_recognition as sr
from pydub import AudioSegment


def convert_audio_to_text(audio_file):
    recognizer = sr.Recognizer()
    audio = AudioSegment.from_file(audio_file)
    audio = audio.set_channels(1).set_frame_rate(16000)
    audio.export("temp.wav", format="wav")

    with sr.AudioFile("temp.wav") as source:
        audio_data = recognizer.record(source)

    try:
        conversation_text = recognizer.recognize_google(audio_data)
        return conversation_text
    except sr.UnknownValueError:
        print("Google Speech Recognition could not understand audio")
        return None
    except sr.RequestError as e:
        print(f"Could not request results from Google Speech Recognition service; {e}")
        return None


Objective: Prepare the text for sentiment analysis.

Process: Split the text into sentences by periods. This allows each sentence to be analyzed individually.

In [None]:
def preprocess_text(text):
    sentences = text.split(".")
    return sentences

Objective: Analyze the sentiment of each sentence.
Process:

  Use a pre-trained BERT model for sentiment analysis from the nlptown library.

  Tokenize each sentence and pass it through the model.

  Calculate the sentiment score based on the model’s output.

  Count the number of positive, negative, and neutral sentences based on the score.

In [None]:
def analyze_sentiment(sentences):
    model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertForSequenceClassification.from_pretrained(model_name)

    positive, negative, neutral = 0, 0, 0

    for sentence in sentences:
        inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
        outputs = model(**inputs)
        probs = softmax(outputs.logits, dim=-1)
        sentiment_score = torch.argmax(probs)

        if sentiment_score in [4, 3]:
            positive += 1
        elif sentiment_score in [0, 1]:
            negative += 1
        else:
            neutral += 1

    return positive, negative, neutral


Objective: Calculate the percentage of positive, negative, and neutral sentences.

Process: Compute the percentage of each sentiment category relative to the total number of sentences.

In [None]:
def calculate_percentage(positive, negative, neutral):
    total = positive + negative + neutral
    if total == 0:
        return 0, 0, 0
    positive_percentage = (positive / total) * 100
    negative_percentage = (negative / total) * 100
    neutral_percentage = (neutral / total) * 100
    return positive_percentage, negative_percentage, neutral_percentage


Objective: Run the entire pipeline from audio to sentiment classification.
Process:

  Convert the audio file to text.

  Preprocess the text into sentences.

  Analyze the sentiment of each sentence.

  Calculate the percentage of each sentiment.
  
  Print the results and classify the overall sentiment of the conversation.

In [None]:
def main(audio_file):
    print("Converting audio to text...")
    conversation_text = convert_audio_to_text(audio_file)
    if not conversation_text:
        print("No text extracted from audio.")
        return

    print("Preprocessing text...")
    sentences = preprocess_text(conversation_text)

    print("Analyzing sentiment with BERT...")
    positive, negative, neutral = analyze_sentiment(sentences)

    print("Calculating percentages...")
    positive_percentage, negative_percentage, neutral_percentage = calculate_percentage(positive, negative, neutral)

    print(f"Positive Sentiment: {positive_percentage:.2f}%")
    print(f"Negative Sentiment: {negative_percentage:.2f}%")
    print(f"Neutral Sentiment: {neutral_percentage:.2f}%")

    if positive_percentage > negative_percentage and positive_percentage > neutral_percentage:
        print("The call is classified as Positive in terms of customer satisfaction.")
    elif negative_percentage > positive_percentage and negative_percentage > neutral_percentage:
        print("The call is classified as Negative in terms of customer satisfaction.")
    else:
        print("The call is classified as Neutral in terms of customer satisfaction.")


Objective: Download an audio file and process it.
Process:

  Use gdown to download the audio file from Google Drive.
  
  Call the main function to process the downloaded audio file.

In [None]:
if __name__ == "__main__":
    url = "https://drive.google.com/uc?id=191pj9uBliYFx7XVh6e6iGYPLt5e_Asc3"
    output = "AudioCall.mp3"
    gdown.download(url, output, quiet=False)

    audio_file_path = "AudioCall.mp3"
    main(audio_file_path)
