In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow as keras
from keras import layers
import librosa
import pandas as pd
import os
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Reshape


# Load the audio samples and quality ratings
audio_folder = 'E:/somos/audios/audios'
ratings_file = 'E:/somos/raw_scores_with_metadata/raw_scores.tsv'

# Load audio samples
audio_files = [file for file in os.listdir(audio_folder) if file.endswith('.wav')]
audio_samples = []
audio_ids = []
for audio_file in audio_files:
    audio_id = audio_file.split('.')[0]  # Extract audio ID from the file name
    audio_path = os.path.join(audio_folder, audio_file)
    audio, sr = librosa.load(audio_path, sr=None)
    audio_samples.append(audio)
    audio_ids.append(audio_id)

# Load quality ratings
ratings_df = pd.read_csv(ratings_file, sep='\t')
quality_ratings = []
for audio_id in audio_ids:
    rating = ratings_df[ratings_df['utteranceId'] == audio_id]['choice'].values
    if len(rating) > 0:
        quality_ratings.append(rating[0])

# Convert audio samples and quality ratings to numpy arrays
audio_samples = np.array(audio_samples, dtype=object)
quality_ratings = np.array(quality_ratings)
# Subtract 1 from the quality ratings to convert them to valid class indices
quality_ratings -= 1

print(audio_samples)
print(quality_ratings)


# Perform train-test split
x_train, x_val, y_train, y_val = train_test_split(audio_samples, quality_ratings, test_size=0.2, random_state=42)

# Preprocess audio samples
def preprocess_audio_samples(audio_samples):
    preprocessed_samples = []
    for audio in audio_samples:
        # Apply audio preprocessing if needed
        preprocessed_samples.append(audio)
    return preprocessed_samples

preprocessed_audio_samples_train = preprocess_audio_samples(x_train)
preprocessed_audio_samples_val = preprocess_audio_samples(x_val)

# Function to extract MFCC features from audio samples
def extract_mfcc_features(audio_samples):
    mfcc_features = []
    for audio in audio_samples:
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=num_mfcc)
        mfcc_features.append(mfcc.T)
    return mfcc_features

# Set the parameters for MFCC feature extraction
sample_rate = 44100  # Sample rate of your audio data
num_mfcc = 13  # Number of MFCC coefficients to extract

# Extract MFCC features for training data
feature_representation_train = extract_mfcc_features(preprocessed_audio_samples_train)

# Extract MFCC features for validation data
feature_representation_val = extract_mfcc_features(preprocessed_audio_samples_val)

# Convert the feature representation and ratings to numpy arrays
X_train = np.array(feature_representation_train,dtype=object)
X_train = pad_sequences(X_train, dtype='float32', padding='post')
X_val = np.array(feature_representation_val,dtype=object)
X_val = pad_sequences(X_val, dtype='float32', padding='post')

# Determine the number of classes
num_classes = np.max(quality_ratings) + 1

y_train = np.array(y_train)
y_val = np.array(y_val)
# One-hot encode the labels
num_classes = 5  # Number of rating classes
y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)

# # Define the model architecture
# model = tf.keras.Sequential([
#     layers.Input(shape=X_train[0].shape),
#     # Add layers to your model as needed
#     # Example:
#     layers.Dense(64, activation='relu'),
#     layers.Dense(5, activation='softmax')
# ])

# # Compile the model
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])



# # Train the model
# model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

# def convert_audio_to_feature_representation(audio_samples):
#     feature_representation = []
#     for audio in audio_samples:
#         # Extract MFCC features
#         mfcc = librosa.feature.mfcc(audio, sr=sample_rate, n_mfcc=13)
#         feature_representation.append(mfcc.T)  # Append the feature representation to the list
#     return feature_representation


# # Make predictions on new audio samples
# new_audio_samples = 'audios/audios/booksent_2012_0005_164.wav'  # Provide new audio samples to predict on
# preprocessed_new_audio_samples = preprocess_audio_samples(new_audio_samples)
# new_feature_representation = convert_audio_to_feature_representation(preprocessed_new_audio_samples)
# X_new = np.array(new_feature_representation)
# predictions = model.predict(X_new)
# print(predictions)


# Build the model
model = Sequential()
model.add(Reshape((num_classes,), input_shape=(num_classes,)))
model.add(Dense(64, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))


def convert_audio_to_feature_representation(audio_samples):
    feature_representation = []
    for audio in audio_samples:
        # Extract MFCC features
        mfcc = librosa.feature.mfcc(audio, sr=sample_rate, n_mfcc=13)
        feature_representation.append(mfcc.T)  # Append the feature representation to the list
    return feature_representation


# Make predictions on new audio samples
new_audio_samples = 'audios/audios/booksent_2012_0005_164.wav'  # Provide new audio samples to predict on
preprocessed_new_audio_samples = preprocess_audio_samples(new_audio_samples)
new_feature_representation = convert_audio_to_feature_representation(preprocessed_new_audio_samples)
X_new = np.array(new_feature_representation)
predictions = model.predict(X_new)
print(predictions)

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'E:/somos/audios/audios'

In [6]:
import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.callbacks import ModelCheckpoint

# Load the dataset
data = librosa.load('somos.zip', sr=16000)

# Extract the features
features = librosa.feature.mfcc(data, n_mfcc=20, hop_length=160, win_length=400)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, data, test_size=0.25)

# Normalize the data
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

# Create the model
model = Sequential()
model.add(Flatten(input_shape=(X_train.shape[1],)))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(10, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate the model
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Save the model
model_path = 'model.h5'
model.save(model_path)

# Make predictions for new audios
def predict(audio):
    features = librosa.feature.mfcc(audio, n_mfcc=20, hop_length=160, win_length=400)
    features = np.asarray(features)
    prediction = model.predict(features)
    return prediction

  data = librosa.load('somos.zip', sr=16000)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


FileNotFoundError: [Errno 2] No such file or directory: 'somos.zip'

In [59]:
""" #to identify emotions

import pandas as pd
import librosa
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


df = pd.read_csv('E:\Samsung-Test\emotions.csv')

# Extract input features (X) and labels (y)
X = df['Emotion'].values
y = df['Text'].values

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
X = label_encoder.fit_transform(X)

 # Load the dataset
# data = np.load('E:\Samsung-Test\emotions.npz')
# X = data['X']
# y = data['y'] 

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# Train the model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions for new values
new_input = librosa.load('output.mp3')
new_features = librosa.feature.mfcc(new_input)
new_prediction = model.predict(new_features)


# Decode the predicted labels back to their original string values
predicted_labels = label_encoder.inverse_transform(new_prediction)

# Print the prediction
print(predicted_labels)
 """
import pandas as pd
import librosa
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


# Read the CSV file containing the training data
df = pd.read_csv('E:\Samsung-Test\emotions.csv')

# Extract input features (X) and labels (y)
X = df['Text'].values
y = df['Emotion'].values

# Encode string labels to numeric values
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# Train the model
model = LogisticRegression()
model.fit(X_train, y_train)

# Load and process the audio file
audio_file = 'output.mp3'
audio_data, sr = librosa.load(audio_file, duration=3)  # Load only the first 3 seconds
audio_features = librosa.feature.mfcc(y=audio_data, sr=sr)

# Reshape the audio features to match the expected input shape of the model
audio_features = audio_features.reshape(1, -1)

# Make predictions on the audio features
predicted_label = label_encoder.inverse_transform(model.predict(audio_features))

# Print the predicted emotion
print(predicted_label)


ValueError: could not convert string to float: 'My father in law was diagnosed with pancreatic cancer when our daughter was  nine  months old. He passed  nine  months later when she was  one  eight  months.\t\t\t\t\t\nSad-312'

In [30]:
""" import csv

# Open the text file
with open('E:\Samsung-Test\B.txt', 'r') as file:
    # Read the file content
    content = file.read()
    
    # Split the content based on "|"
    split_content = content.split("|")
    
    # Save the split content to a CSV file
    with open('E:\Samsung-Test\output2.csv','a',  newline='') as csv_file:
        writer = csv.writer(csv_file)
        for item in split_content:
            writer.writerow([item.strip()])  # Optional: Use strip() to remove leading/trailing whitespaces


 """

In [67]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Read the CSV file
df = pd.read_csv('emotions.csv')

# Replace missing values with an empty string
df['Text'].fillna('', inplace=True)

# Extract input features (X) and labels (y)
X = df['Text'].values
y = df['Emotion'].values

# Convert emotions to numerical labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

# Train the model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
predicted_labels = label_encoder.inverse_transform(model.predict(X_test))

# Print the predictions
print(predicted_labels)


['Anger' 'Happy' 'Base' 'Base' 'Anger' 'Calm' 'Happy' 'Surprise' 'Calm'
 'Anger' 'Fear' 'Calm' 'Surprise' 'Surprise' 'Surprise' 'Surprise' 'Sad'
 'Calm' 'Excited' 'Surprise' 'Anger' 'Calm' 'Surprise' 'Apologetic' 'Base'
 'Apologetic' 'Base' 'Apologetic' 'Anger' 'Surprise' 'Sad' 'Calm' 'Anger'
 'Surprise' 'Anger' 'Apologetic' 'Excited' 'Base' 'Excited' 'Sad'
 'Excited' 'Sad' 'Surprise' 'Calm' 'Surprise' 'Sad' 'Surprise' 'Anger'
 'Base' 'Apologetic' 'Happy' 'Anger' 'Calm' 'Base' 'Anger' 'Surprise'
 'Happy' 'Fear' 'Anger' 'Happy' 'Excited' 'Anger' 'Surprise' 'Excited'
 'Surprise' 'Calm' 'Fear' 'Fear' 'Base' 'Anger' 'Sad' 'Base' 'Happy'
 'Excited' 'Apologetic' 'Surprise' 'Surprise' 'Anger' 'Base' 'Calm' 'Sad'
 'Sad' 'Excited' 'Fear' 'Surprise' 'Excited' 'Surprise' 'Sad' 'Fear'
 'Anger' 'Fear' 'Anger' 'Excited' 'Excited' 'Excited' 'Calm' 'Excited'
 'Anger' 'Apologetic' 'Anger' 'Anger' 'Anger' 'Happy' 'Apologetic' 'Anger'
 'Fear' 'Excited' 'Happy' 'Calm' 'Excited' 'Anger' 'Sad' 'Calm' 'Calm'

In [70]:
import pandas as pd
import librosa
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Read the CSV file
df = pd.read_csv('emotions.csv')

# Replace missing values with an empty string
df['Text'].fillna('', inplace=True)

# Extract input features (X) and labels (y)
X = df['Text'].values
y = df['Emotion'].values

# Convert emotions to numerical labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

# Train the model
model = LogisticRegression()
model.fit(X_train, y_train)

# Load and process the audio file
audio_file = 'output.mp3'
audio_data, sr = librosa.load(audio_file, duration=3)  # Load only the first 3 seconds
audio_text = librosa.feature.mfcc(y=audio_data, sr=sr)

# Convert the audio text array to string representation
audio_text_str = ' '.join(map(str, audio_text))

# Vectorize the audio text using the same TF-IDF vectorizer
audio_text_vectorized = vectorizer.transform([audio_text_str])

# Make predictions on the audio text
predicted_label = label_encoder.inverse_transform(model.predict(audio_text_vectorized))

# Print the prediction
print(predicted_label)


['Base']
