In [1]:
import pyaudio
import wave
import speech_recognition as sr
import azure.cognitiveservices.speech as speechsdk
from transformers import pipeline


In [2]:
import pyaudio
import wave
import azure.cognitiveservices.speech as speechsdk
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax

def azure_transcribe(subscription_key, region, record_seconds=5):
    # Audio Recording
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 44100

    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
    print("Start Recording")

    frames = []
    for i in range(0, int(RATE / CHUNK * record_seconds)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("Recording Done")
    stream.stop_stream()
    stream.close()
    p.terminate()

    wf = wave.open('recording.wav', 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

    # Azure Transcription
    audio_path = 'recording.wav'

    # Set up the Azure Speech SDK
    speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=region)
    audio_input = speechsdk.AudioConfig(filename=audio_path)

    # Create a speech recognizer
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)

    # Transcribe
    result = speech_recognizer.recognize_once()

    # Check the result
    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        return result.text
    elif result.reason == speechsdk.ResultReason.NoMatch:
        return "No speech could be recognized"
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        return "Speech Recognition canceled: {}. Error details: {}".format(cancellation_details.reason, cancellation_details.error_details)

def analyze_sentiment_scores(subscription_key, region, record_seconds=5):
    # Step 1: Record and transcribe the audio
    transcription = azure_transcribe(subscription_key, region, record_seconds)
    
    # Step 2: Perform sentiment analysis on the transcription
    MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL)
    
    encoded_text = tokenizer(transcription, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    
    scores_dict = {
        'roberta_neg': scores[0],
        'roberta_neu': scores[1],
        'roberta_pos': scores[2]
    }
    
    if scores_dict['roberta_neg'] > 0.8:
        sentiment_label = "Very bad"
    elif scores_dict['roberta_neg'] > scores_dict['roberta_pos']:
        sentiment_label = "Bad"
    elif scores_dict['roberta_neu'] > scores_dict['roberta_pos'] and scores_dict['roberta_neu'] > scores_dict['roberta_neg']:
        sentiment_label = "Neutral"
    elif scores_dict['roberta_pos'] > 0.8:
        sentiment_label = "Very good"
    else:
        sentiment_label = "Good"
    
    result_string = f"Transcription of the recorded text: {transcription}\nSentiment: {sentiment_label}"
    
    return result_string




In [3]:
result = analyze_sentiment_scores('efb37ac3f0224c9eb9e58b588686a83f','eastus')
print(result)

Start Recording
Recording Done
Transcription of the recorded text: That's very nice.
Sentiment: Very good


In [36]:
#import pickle to save my funtion 
import pickle

In [37]:
# create a pickle with the whole funtion
with open('analyze_sentiment_scores.pkl', 'wb') as f:
    pickle.dump(analyze_sentiment_scores,f)