In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.4.1-py3-none-any.whl (487 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m487.4/487.4 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xx

In [None]:
pip install numpy seaborn neattext scikit-learn joblib openai-whisper

Collecting neattext
  Downloading neattext-0.1.3-py3-none-any.whl.metadata (12 kB)
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import neattext as nt
import joblib
import whisper
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from datasets import load_dataset

# Load the Emotion dataset
dataset = load_dataset("emotion")
df = dataset['train'].to_pandas()
print(df.head())

model = whisper.load_model("base")

def transcribe_audio(audio_path):
    result = model.transcribe(audio_path)
    return result['text']

def preprocess_text(text):
    # Clean and preprocess the text
    text = nt.TextFrame(text).remove_special_characters().remove_stopwords().text
    return text

df['processed_text'] = df['text'].apply(preprocess_text)

vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(df['processed_text']).toarray()
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = MultinomialNB()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

joblib.dump(clf, 'emotion_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

loaded_model = joblib.load('emotion_model.pkl')
loaded_vectorizer = joblib.load('tfidf_vectorizer.pkl')

def predict_emotion(text):
    processed_text = preprocess_text(text)
    vectorized_text = loaded_vectorizer.transform([processed_text]).toarray()
    emotion = loaded_model.predict(vectorized_text)
    return emotion[0]

sample_text = "i didnt feel humiliated"
emotion = predict_emotion(sample_text)
print(f"Predicted Emotion: {emotion}")

emotion_labels = {
    0: 'anger', 1: 'fear', 2: 'joy', 3: 'sadness', 4: 'surprise', 5: 'love'
}

print(f"Predicted Emotion: {emotion_labels[emotion]}")

audio_file = "/content/audio_speech.mp3"  # Replace with your audio file path
transcribed_text = transcribe_audio(audio_file)
predicted_emotion = predict_emotion(transcribed_text)
print(f"Predicted Emotion from Audio: {emotion_labels[predicted_emotion]}")


                                                text  label
0                            i didnt feel humiliated      0
1  i can go from feeling so hopeless to so damned...      0
2   im grabbing a minute to post i feel greedy wrong      3
3  i am ever feeling nostalgic about the fireplac...      2
4                               i am feeling grouchy      3
Accuracy:  0.8171875
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.92      0.88       946
           1       0.74      0.97      0.84      1021
           2       0.96      0.45      0.61       296
           3       0.90      0.72      0.80       427
           4       0.88      0.71      0.79       397
           5       1.00      0.25      0.40       113

    accuracy                           0.82      3200
   macro avg       0.89      0.67      0.72      3200
weighted avg       0.84      0.82      0.80      3200

Predicted Emotion: 0
Predicted Emotion: anger




Predicted Emotion from Audio: fear
