In [1]:
import numpy as np

def load_glove_embeddings(filepath):
    embeddings_index = {}
    with open(filepath, encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    return embeddings_index

# Load GloVe embeddings
glove_file = 'glove.6B.100d.txt'
embeddings_index = load_glove_embeddings(glove_file)


In [2]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

# Load the pre-trained model
model = tf.keras.models.load_model('Model.h5', compile=False)

# Initialize label encoder with class labels
label_encoder = LabelEncoder()
label_encoder.classes_ = np.array(['negative', 'neutral', 'positive'])


In [3]:
import sounddevice as sd
import speech_recognition as sr
from scipy.io.wavfile import write

def record_sentence(filename='output.wav', duration=5, samplerate=16000):
    print("Recording...")
    recording = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype='int16')
    sd.wait()
    write(filename, samplerate, recording)
    print("Recording complete. File saved as", filename)

def audio_to_text(filename='output.wav'):
    recognizer = sr.Recognizer()
    with sr.AudioFile(filename) as source:
        audio = recognizer.record(source)
    return recognizer.recognize_google(audio)


In [4]:
import numpy as np
import re

# Function to preprocess text
def preprocess_text(text):
    text = re.sub(r'@[A-Za-z0-9_]+', '', text)  # Remove mentions starting with @
    text = re.sub(r'[^a-zA-Z\s]', '', text)    # Remove non-alphanumeric characters
    text = text.lower()                        # Convert to lowercase
    return text

# Vectorize text using GloVe embeddings
def vectorize_text_with_glove(text, embeddings_index):
    words = text.split()
    embedding_dim = len(next(iter(embeddings_index.values())))
    text_vector = np.zeros(embedding_dim)
    count = 0
    for word in words:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            text_vector += embedding_vector
            count += 1
    if count != 0:
        text_vector /= count
    return text_vector

def predict_sentiment(text, embeddings_index, model, label_encoder):
    # Preprocess and vectorize text
    preprocessed_text = preprocess_text(text)
    text_vector = vectorize_text_with_glove(preprocessed_text, embeddings_index)

    # Reshape for model input (timesteps, features)
    text_vector_reshaped = text_vector.reshape((1, 1, len(text_vector)))

    # Predict sentiment
    pred_probs = model.predict(text_vector_reshaped)
    pred_label_index = np.argmax(pred_probs, axis=1)
    pred_label = label_encoder.inverse_transform(pred_label_index)

    return pred_label[0]


In [5]:
# Load GloVe embeddings
glove_file = 'glove.6B.100d.txt'
embeddings_index = load_glove_embeddings(glove_file)

# Load model and label encoder
model = tf.keras.models.load_model('Model.h5', compile=False)
label_encoder = LabelEncoder()
label_encoder.classes_ = np.array(['negative', 'neutral', 'positive'])

# Record and process a single sentence
record_sentence()  # Record audio

# Convert audio to text
text = audio_to_text()
print("Recognized text:", text)

# Predict sentiment
predicted_sentiment = predict_sentiment(text, embeddings_index, model, label_encoder)
print("Predicted sentiment:", predicted_sentiment)


Recording...
Recording complete. File saved as output.wav
Recognized text: you are so mein
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 369ms/step
Predicted sentiment: negative
