In [None]:
#Inspiration for the sentiment analysis and speech to text algorithms were found at:
#https://www.geeksforgeeks.org/python-convert-speech-to-text-and-text-to-speech/ (speech to text)
#https://towardsdatascience.com/sentiment-analysis-with-python-part-1-5ce197074184 (sentiment analysis)

In [None]:
#training set
reviews_train = []
for line in open('SpeechTrain3.txt', 'r'):
    reviews_train.append(line.strip())
#test set    
reviews_test = []
for line in open('SpeechTest.txt', 'r'):
    reviews_test.append(line.strip())

In [None]:
#The length of the test and training set are necessary in order to distinguish between possitive and negative messages
Test_len = len(reviews_test)
Train_len = len(reviews_train)

Train_len_half = int(Train_len/2)
Test_len_half = int(Test_len/2)

In [None]:
#cleaning/text pre-processing function
import re

REPLACE_NO_SPACE = re.compile("[.;:!\'?,\"()\[\]]")
REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")

def preprocess_reviews(reviews):
    reviews = [REPLACE_NO_SPACE.sub("", line.lower()) for line in reviews]
    reviews = [REPLACE_WITH_SPACE.sub(" ", line) for line in reviews]
    
    return reviews

reviews_train_clean = preprocess_reviews(reviews_train)
reviews_test_clean = preprocess_reviews(reviews_test)

In [None]:
#vectorizing testing and training datasets
from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer(binary=True)
cv.fit(reviews_train_clean)
X = cv.transform(reviews_train_clean)
X_test = cv.transform(reviews_test_clean)

In [None]:
#importing machine learning algorithm and training using logistic regression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

target_train = [1 if i <= Train_len_half else 0 for i in range(Train_len)]
target_test = [1 if i <= Test_len_half else 0 for i in range(Test_len)]

X_train, X_val, y_train, y_val = train_test_split(
    X, target_train, train_size = 0.75
)

for c in [0.01, 0.05, 0.25, 0.5, 1, 1.5, 2, 3]:
    
    lr = LogisticRegression(C=c)
    lr.fit(X_train, y_train)
    print ("Accuracy for C=%s: %s" 
           % (c, accuracy_score(y_val, lr.predict(X_val))))

Accuracy for C=0.01: 0.72
Accuracy for C=0.05: 0.76
Accuracy for C=0.25: 0.82
Accuracy for C=0.5: 0.84
Accuracy for C=1: 0.88
Accuracy for C=1.5: 0.88
Accuracy for C=2: 0.88
Accuracy for C=3: 0.88




In [None]:
#fitting the model to our training set
#choose highest c # before the values plateau
final_model = LogisticRegression(C=1.5)
final_model.fit(X, target_train)
print ("Final Accuracy: %s" 
       % accuracy_score(target_test, final_model.predict(X_test)))

Final Accuracy: 0.81


In [None]:
#Find out which words are associate the most with possitve messages
feature_to_coef = {
    word: coef for word, coef in zip(
        cv.get_feature_names(), final_model.coef_[0]
    )
}
for best_positive in sorted(
    feature_to_coef.items(), 
    key=lambda x: x[1], 
    reverse=True)[:5]:
    print (best_positive)
    

('good', 1.5790519418606421)
('no', 1.3654543712616638)
('nothing', 1.0990682117740753)
('does', 1.0647630746427286)
('satisfactory', 1.0468833644054143)


In [None]:
#Find out which words are associate the most with negative messages
for best_negative in sorted(
    feature_to_coef.items(), 
    key=lambda x: x[1])[:5]:
    print (best_negative)

('not', -1.674107835678854)
('broken', -1.597241689276459)
('needs', -1.1264082355356657)
('cheap', -1.0941093077406783)
('was', -1.012038190888181)


In [None]:
#importing widget library for button support
import ipywidgets as widgets
from IPython.core.display import clear_output

In [None]:
#below runs slider and start button
slider = widgets.IntSlider(
    value= 0,
    min=0,
    max=25,
    step=1,
    description='Timer:',
    orientation='horizontal',
    readout=True,
    readout_format='d')
start_button = widgets.Button(description='Start')
out = widgets.Output()
def on_button_clicked(_):
    with out:
        time_value = slider.value
        SpeaktoText(time_value)
        clear_output()

In [None]:
# Python program to translate 
# speech to text and text to speech 


import speech_recognition as sr 
import pyttsx3 
import time


# Initialize the recognizer 
r = sr.Recognizer() 

# Function to convert text to 
# speech 
def SpeakText(command): 

    # Initialize the engine 
    engine = pyttsx3.init() 
    engine.say(command) 
    engine.runAndWait() 
    
    
# Loop infinitely for user to 
# speak 

#creating SpeaktoText function below 

def SpeaktoText(timer):
    global MT
    run_time = timer
    t_end = time.time() + int(run_time)

    MT = []

    while time.time() < t_end:
    
    # Exception handling to handle 
    # exceptions at the runtime 
        try: 

        # use the microphone as source for input. 
            with sr.Microphone() as source2: 

            # wait for a second to let the recognizer 
            # adjust the energy threshold based on 
            # the surrounding noise level 
                r.adjust_for_ambient_noise(source2, duration=0.2) 
            
            #listens for the user's input 
                audio2 = r.listen(source2) 
            
            # Using ggogle to recognize audio 
                MyText = r.recognize_google(audio2) 
                MyText = MyText.lower() 

                print(MyText) 
            #SpeakText(MyText)
                MT.append(MyText)
        
        except sr.RequestError as e: 
            print("Could not request results; {0}".format(e)) 

        except sr.UnknownValueError: 
            print("unknown error occured")



In [None]:
#start and timer slider widget 
start_button.on_click(on_button_clicked)
widgets.VBox([start_button, slider, out])

VBox(children=(Button(description='Start', style=ButtonStyle()), IntSlider(value=0, description='Timer:', max=…

In [None]:
#output from speech to text saved under output_train.txt
with open("output_train.txt", "w") as txt_file:
    for line in MT:
        txt_file.write("".join(line) + "\n")

NameError: name 'MT' is not defined

In [None]:
#vectorizing output from speech to text function
MT1 = cv.transform(MT)

In [None]:
#predicting sentiment values of input speech to text
final_model.predict(MT1)

In [None]:
#print the input from the microphone to compare it with the output from the sentiment analysis
print(MT1)