In [44]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [45]:
import pandas as pd

# Load dataset
file_path = '/content/drive/MyDrive/go emotion (text dataset)/tables/emotion_words.csv'
df = pd.read_csv(file_path)

# Rename 'word' column to 'text'
df.rename(columns={'word': 'text'}, inplace=True)

# Drop unwanted columns
df.drop(['odds', 'freq'], axis=1, inplace=True) 

# Display updated data
print(df.head())


      emotion       text
0  admiration      great
1  admiration    awesome
2  admiration    amazing
3  admiration       good
4  admiration  beautiful


In [46]:
df

Unnamed: 0,emotion,text
0,admiration,great
1,admiration,awesome
2,admiration,amazing
3,admiration,good
4,admiration,beautiful
...,...,...
1173,neutral,correlation
1174,neutral,hiv
1175,neutral,somebody
1176,neutral,slow


In [47]:
df.info()
df.columns


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1178 entries, 0 to 1177
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   emotion  1178 non-null   object
 1   text     1178 non-null   object
dtypes: object(2)
memory usage: 18.5+ KB


Index(['emotion', 'text'], dtype='object')

In [48]:
from transformers import BertTokenizer

# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenize text data
df['tokens'] = df['text'].apply(lambda x: tokenizer.encode(x, truncation=True, padding="max_length", max_length=128))

# Display processed data
print(df[['text', 'tokens', 'emotion']].head())


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

        text                                             tokens     emotion
0      great  [101, 2307, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...  admiration
1    awesome  [101, 12476, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...  admiration
2    amazing  [101, 6429, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...  admiration
3       good  [101, 2204, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...  admiration
4  beautiful  [101, 3376, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...  admiration


In [49]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [50]:
df.columns = df.columns.str.strip()
print(df.columns)


Index(['emotion', 'text', 'tokens'], dtype='object')


In [51]:
from sklearn.model_selection import train_test_split
import torch
from transformers import BertTokenizer  # Import BertTokenizer

# ... (rest of the code before val_encodings)

# Load BERT tokenizer (Ensure this line is executed before using tokenizer)
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Convert emotion labels into numerical format
emotion_labels = {label: idx for idx, label in enumerate(df['emotion'].unique())}
df['label'] = df['emotion'].map(emotion_labels)

# Split dataset into training and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(df['text'], df['label'], test_size=0.1)

# Convert data into BERT format for both train and validation sets
val_encodings = tokenizer(list(val_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt") # This line is added to create train_encodings

train_labels = torch.tensor(train_labels.values)
val_labels = torch.tensor(val_labels.values)

In [52]:
import os
os.environ["WANDB_DISABLED"] = "true"


In [53]:
import torch

class EmotionDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item


In [54]:
# Create Dataset objects
train_dataset = EmotionDataset(train_encodings, train_labels)
val_dataset = EmotionDataset(val_encodings, val_labels)


In [55]:
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

# Load pre-trained BERT model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=len(emotion_labels))

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    eval_strategy="epoch",  # Fixed deprecated warning here!
    save_strategy="epoch",
    logging_dir="./logs"
)

# Create Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

# Start Training
trainer.train()


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss
1,No log,2.595792
2,No log,2.271429
3,No log,2.166395


TrainOutput(global_step=399, training_loss=2.5329066685267856, metrics={'train_runtime': 786.1181, 'train_samples_per_second': 4.045, 'train_steps_per_second': 0.508, 'total_flos': 13076382447360.0, 'train_loss': 2.5329066685267856, 'epoch': 3.0})

In [None]:
import os

model_path = "/content/text_emotion_model"
os.makedirs(model_path, exist_ok=True)


In [None]:
# Save the model and tokenizer
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)


('/content/text_emotion_model/tokenizer_config.json',
 '/content/text_emotion_model/special_tokens_map.json',
 '/content/text_emotion_model/vocab.txt',
 '/content/text_emotion_model/added_tokens.json')

In [None]:
from transformers import BertForSequenceClassification, BertTokenizer

# Load the model from the local path
model = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)


In [None]:
text = "I feel very sad today."
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
outputs = model(**inputs)
logits = outputs.logits

# Get predicted class
predicted_class = torch.argmax(logits, dim=-1).item()
predicted_emotion = list(emotion_labels.keys())[list(emotion_labels.values()).index(predicted_class)]

print(f"Predicted Emotion: {predicted_emotion}")


Predicted Emotion: admiration


#**RAVEES Dataset**#


##Import the Modules##

In [20]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import warnings
warnings.filterwarnings('ignore')

##Load the Dataset##

In [23]:
# Step 1: Install kagglehub (if not already installed)
!pip install kagglehub

# Step 2: Import Libraries
import kagglehub
import os

# Step 3: Download latest version of the dataset
path = kagglehub.dataset_download("ejlok1/toronto-emotional-speech-set-tess")
print("✅ Path to dataset files:", path)

# Step 4: Extract Paths and Labels
paths = []
labels = []

for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        if filename.endswith('.wav'):  # Ensure only audio files are processed
            paths.append(os.path.join(dirname, filename))

            # Extract label from filename (assuming format like 'happy_001.wav')
            label = filename.split('_')[-1]
            label = label.split('.')[0]
            labels.append(label.lower())

        # ✅ Limit number of files to 2800 to avoid overloading memory
        if len(paths) == 2800:
            break

print(f"✅ Dataset Loaded — Total Samples: {len(paths)}")
print(f"✅ First 5 Labels: {labels[:5]}")


Downloading from https://www.kaggle.com/api/v1/datasets/download/ejlok1/toronto-emotional-speech-set-tess?dataset_version_number=1...


100%|██████████| 428M/428M [00:03<00:00, 121MB/s]

Extracting files...





✅ Path to dataset files: /root/.cache/kagglehub/datasets/ejlok1/toronto-emotional-speech-set-tess/versions/1
✅ Dataset Loaded — Total Samples: 5600
✅ First 5 Labels: ['angry', 'angry', 'angry', 'angry', 'angry']


In [24]:
len(paths)

5600

In [25]:
paths[:5]

['/root/.cache/kagglehub/datasets/ejlok1/toronto-emotional-speech-set-tess/versions/1/tess toronto emotional speech set data/TESS Toronto emotional speech set data/YAF_angry/YAF_ton_angry.wav',
 '/root/.cache/kagglehub/datasets/ejlok1/toronto-emotional-speech-set-tess/versions/1/tess toronto emotional speech set data/TESS Toronto emotional speech set data/YAF_angry/YAF_rat_angry.wav',
 '/root/.cache/kagglehub/datasets/ejlok1/toronto-emotional-speech-set-tess/versions/1/tess toronto emotional speech set data/TESS Toronto emotional speech set data/YAF_angry/YAF_doll_angry.wav',
 '/root/.cache/kagglehub/datasets/ejlok1/toronto-emotional-speech-set-tess/versions/1/tess toronto emotional speech set data/TESS Toronto emotional speech set data/YAF_angry/YAF_goal_angry.wav',
 '/root/.cache/kagglehub/datasets/ejlok1/toronto-emotional-speech-set-tess/versions/1/tess toronto emotional speech set data/TESS Toronto emotional speech set data/YAF_angry/YAF_ring_angry.wav']

In [26]:
labels[:5]

['angry', 'angry', 'angry', 'angry', 'angry']

In [27]:
## Create a dataframe
df = pd.DataFrame()
df['speech'] = paths
df['label'] = labels
df.head()

Unnamed: 0,speech,label
0,/root/.cache/kagglehub/datasets/ejlok1/toronto...,angry
1,/root/.cache/kagglehub/datasets/ejlok1/toronto...,angry
2,/root/.cache/kagglehub/datasets/ejlok1/toronto...,angry
3,/root/.cache/kagglehub/datasets/ejlok1/toronto...,angry
4,/root/.cache/kagglehub/datasets/ejlok1/toronto...,angry


In [28]:
df['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
angry,800
disgust,800
ps,800
neutral,800
fear,800
sad,800
happy,800


In [29]:
def waveplot(data, sr, emotion):
    plt.figure(figsize=(10,4))
    plt.title(emotion, size=20)
    librosa.display.waveshow(data, sr=sr)
    plt.show()

def spectogram(data, sr, emotion):
    x = librosa.stft(data)
    xdb = librosa.amplitude_to_db(abs(x))
    plt.figure(figsize=(11,4))
    plt.title(emotion, size=20)
    librosa.display.specshow(xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()

##Feature Extraction##

In [31]:
def extract_mfcc(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    return mfcc

In [32]:
extract_mfcc(df['speech'][0])

array([-3.4689862e+02,  4.8350666e+01, -7.1730080e+00,  2.2442675e+01,
       -1.3843185e+01,  2.7580633e+00,  5.4906547e-01, -2.1438662e+01,
       -9.6403570e+00,  1.8415675e+00, -2.2781013e+01,  6.7301178e+00,
       -9.6913757e+00,  3.2927620e+00,  2.9441059e+00, -9.5457802e+00,
       -8.3665913e-01, -2.1309252e+00, -1.3715850e+01, -5.1092041e-01,
       -8.3683872e+00, -6.0786853e+00, -5.9717164e+00, -3.9224625e+00,
       -4.1430187e+00,  1.2544775e+00,  2.3073414e-02,  7.4396553e+00,
        5.9169774e+00,  1.1934241e+01,  1.1908927e+01,  1.8584372e+01,
        1.2968408e+01,  1.1704304e+01,  4.4036055e+00,  2.7678478e+00,
        2.7806363e-01,  1.7910783e+00,  4.5199763e-02,  2.9068141e+00],
      dtype=float32)

In [34]:
X_mfcc = df['speech'].apply(lambda x: extract_mfcc(x))


In [35]:
X_mfcc

Unnamed: 0,speech
0,"[-346.89862, 48.350666, -7.173008, 22.442675, ..."
1,"[-325.0482, 52.04364, -21.359823, 3.8501308, -..."
2,"[-332.86166, 55.868332, 6.4463615, 4.7437325, ..."
3,"[-313.5341, 64.24772, 8.541654, 6.5386653, -47..."
4,"[-381.2476, 67.08592, -7.305279, 13.770516, -1..."
...,...
5595,"[-447.77856, 83.91992, 8.446943, -35.14776, -4..."
5596,"[-474.85178, 74.97416, 16.806444, -21.670046, ..."
5597,"[-426.82065, 101.89979, 19.433798, -45.09878, ..."
5598,"[-431.2679, 71.080086, 14.987984, -26.789324, ..."


In [36]:
X = [x for x in X_mfcc]
X = np.array(X)
X.shape

(5600, 40)

In [37]:
## input split
X = np.expand_dims(X, -1)
X.shape

(5600, 40, 1)

In [38]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
y = enc.fit_transform(df[['label']])

In [39]:
y = y.toarray()

In [40]:
y.shape

(5600, 7)

##Creating LSTM model##

In [41]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

model = Sequential([
    LSTM(256, return_sequences=False, input_shape=(40,1)),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(7, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [42]:
# Train the model
history = model.fit(X, y, validation_split=0.2, epochs=50, batch_size=64)

Epoch 1/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 706ms/step - accuracy: 0.5514 - loss: 1.2733 - val_accuracy: 0.8562 - val_loss: 0.3936
Epoch 2/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 358ms/step - accuracy: 0.9016 - loss: 0.3196 - val_accuracy: 0.9795 - val_loss: 0.0932
Epoch 3/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 383ms/step - accuracy: 0.9463 - loss: 0.1667 - val_accuracy: 0.9411 - val_loss: 0.1791
Epoch 4/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 350ms/step - accuracy: 0.9611 - loss: 0.1332 - val_accuracy: 0.9759 - val_loss: 0.0708
Epoch 5/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 359ms/step - accuracy: 0.9727 - loss: 0.0897 - val_accuracy: 0.9866 - val_loss: 0.0518
Epoch 6/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 384ms/step - accuracy: 0.9685 - loss: 0.0989 - val_accuracy: 0.9920 - val_loss: 0.0295
Epoch 7/50
[1m70/70[

In [43]:
# Training Accuracy and Loss
train_loss = history.history['loss'][-1]    # Last training loss
train_accuracy = history.history['accuracy'][-1]  # Last training accuracy

print(f"✅ Final Training Loss: {train_loss:.4f}")
print(f"✅ Final Training Accuracy: {train_accuracy * 100:.2f}%")


✅ Final Training Loss: 0.0002
✅ Final Training Accuracy: 100.00%


##Save the trained models##

In [56]:
# Save text emotion model
text_model.save('/content/drive/MyDrive/text_emotion_model.h5')
print("✅ Text emotion model saved successfully!")

# Save voice emotion model
voice_model.save('/content/drive/MyDrive/voice_emotion_model.h5')
print("✅ Voice emotion model saved successfully!")


NameError: name 'text_model' is not defined