In [1]:
# --- Imoort libraries ---
import os
import torchaudio
import torch
import subprocess
import tempfile
import pandas as pd
from transformers import AutoFeatureExtractor, WavLMModel, AutoTokenizer, RobertaModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch

print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("GPU Name:", torch.cuda.get_device_name(0))


Torch version: 2.5.1+cu121
CUDA available: True
CUDA version: 12.1
GPU Name: NVIDIA GeForce RTX 4070


In [3]:
# --- Configuration ---
BASE_DIR = os.getcwd()
DATA_DIR = os.path.join(BASE_DIR, "MELD.Raw", "train")

MP4_DIR = os.path.join(DATA_DIR, "train_splits")  # Audio files
CSV_FILE = os.path.join(DATA_DIR, "train_sent_emo.csv")  # Text utterances
COMBINEd_DIR =os.path.join(DATA_DIR, "train_combined")  # Combined Audio files

print(MP4_DIR)
print(CSV_FILE)
print(COMBINEd_DIR)


C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_splits
C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_sent_emo.csv
C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined


In [4]:
# --- Models and Tokenizers ---
feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/wavlm-base-plus")
wavlm_model = WavLMModel.from_pretrained("microsoft/wavlm-base-plus")

tokenizer = AutoTokenizer.from_pretrained("roberta-base")
roberta_model = RobertaModel.from_pretrained("roberta-base")

device = "mps" if torch.backends.mps.is_available() else "cpu"
device = "cuda" if torch.cuda.is_available() else device
print(f"cuda available: {torch.cuda.is_available()}")
print(f"device: {device}")
wavlm_model.to(device)
roberta_model.to(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda available: True
device: cuda


RobertaModel(
  (embeddings): RobertaEmbeddings(
    (word_embeddings): Embedding(50265, 768, padding_idx=1)
    (position_embeddings): Embedding(514, 768, padding_idx=1)
    (token_type_embeddings): Embedding(1, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): RobertaEncoder(
    (layer): ModuleList(
      (0-11): 12 x RobertaLayer(
        (attention): RobertaAttention(
          (self): RobertaSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): RobertaSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (dr

In [5]:
# Load CSV file
df = pd.read_csv(CSV_FILE)
df.head()

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime
0,1,also I was the point person on my company’s tr...,Chandler,neutral,neutral,0,0,8,21,"00:16:16,059","00:16:21,731"
1,2,You must’ve had your hands full.,The Interviewer,neutral,neutral,0,1,8,21,"00:16:21,940","00:16:23,442"
2,3,That I did. That I did.,Chandler,neutral,neutral,0,2,8,21,"00:16:23,442","00:16:26,389"
3,4,So let’s talk a little bit about your duties.,The Interviewer,neutral,neutral,0,3,8,21,"00:16:26,820","00:16:29,572"
4,5,My duties? All right.,Chandler,surprise,positive,0,4,8,21,"00:16:34,452","00:16:40,917"


In [11]:
df.tail()

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime
9984,10474,You or me?,Chandler,neutral,neutral,1038,13,2,3,"00:00:48,173","00:00:50,799"
9985,10475,"I got it. Uh, Joey, women don't have Adam's ap...",Ross,neutral,neutral,1038,14,2,3,"00:00:51,009","00:00:53,594"
9986,10476,"You guys are messing with me, right?",Joey,surprise,positive,1038,15,2,3,"00:01:00,518","00:01:03,520"
9987,10477,Yeah.,All,neutral,neutral,1038,16,2,3,"00:01:05,398","00:01:07,274"
9988,10478,"That was a good one. For a second there, I was...",Joey,joy,positive,1038,17,2,3,"00:01:08,401","00:01:12,071"


In [5]:
import os
from pydub import AudioSegment
from collections import defaultdict

# Get all .mp4 audio files
audio_files = [f for f in os.listdir(MP4_DIR) if f.endswith(".mp4")]

# Group files by dialogue number
dialogue_dict = defaultdict(list)
for file in audio_files:
    parts = file.split("_")  # Extract dialogue number
    if len(parts) >= 2:
        dialogue_num = parts[0]  # First part is the dialogue ID
        dialogue_dict[dialogue_num].append(file)

# Sort files by utterance ID within each dialogue
for dialogue in dialogue_dict:
    dialogue_dict[dialogue].sort(key=lambda x: int(x.split("_")[1].split(".")[0][3:] ))



In [6]:
# Output folder for combined audio
os.makedirs(COMBINEd_DIR, exist_ok=True)

In [7]:
# Merge audio files for each dialogue
# dialog 125 has error
for dialogue_num, files in dialogue_dict.items(): 
    try:
        combined_audio = AudioSegment.empty()
        # Convert first audio file to mono/stereo (if needed)
        combined_audio = combined_audio.set_channels(1)   
        
        for file in files:
            file_path = os.path.join(MP4_DIR, file)  
            audio = AudioSegment.from_file(file_path, format="mp4")  # Load .mp4 file     
            
            # Convert to the same number of channels as combined_audio
            audio = audio.set_channels(combined_audio.channels)
            combined_audio += audio  # Concatenate audio files

        output_path = os.path.join(COMBINEd_DIR, f"{dialogue_num}_combined.mp4")
        combined_audio.export(output_path, format="mp4")  # Save as .mp4
        print(f"Saved: {output_path}")

    except Exception as e:
        print(f"Error processing dialogue {dialogue_num}: {e}")
        continue  # Continue with the next dialogue
print("All dialogues merged successfully!")

Saved: C:\Users\allar\Documents\UdeM\IFT6759 Projets Avancées\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia283_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 Projets Avancées\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia284_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 Projets Avancées\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia285_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 Projets Avancées\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia286_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 Projets Avancées\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia287_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 Projets Avancées\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia288_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 Projets Avancées\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train

In [33]:
'''
V1
Save the pre-processed data (MELD)
Save by conversation (every utterances except last one + label of last one)
*V1: raw conversation without using previous labels
*Add index for each utterance embedding
'''
# Merge uttence into one row

# Extract the last emotion for each dialogue
last_emotion = df.groupby('Dialogue_ID').apply(lambda x: x.iloc[-1]['Emotion']).reset_index(name='Last_Emotion')

# Remove the last utterance of each dialogue
df_filtered = df.groupby('Dialogue_ID').apply(lambda x: x.iloc[:-1]).reset_index(drop=True)


# Group by Dialogue ID and concatenate utterances
df_combined_v1 = df.groupby('Dialogue_ID').agg({
    'Utterance': lambda x: ' '.join(x),  # Combine utterances into one string
    'Emotion': lambda x: list(x),  # Keep emotions as a list
    #'Sentiment': lambda x: list(x)  # Keep sentiments as a list (optional)
}).reset_index()

# Merge the last emotion back into df_combined
df_combined_v1 = df_combined_v1.merge(last_emotion, on='Dialogue_ID', how='left')

# Add the 'audio_name' column based on Dialogue_ID
df_combined_v1['audio_name'] = df_combined_v1['Dialogue_ID'].apply(lambda x: f"dia{x}_combined.mp4")

# Save the processed dataset
df_combined_v1.to_csv("MELD_combined_v1.csv", index=False)
print("Processed dataset saved as MELD_combined_v1.csv")


Processed dataset saved as MELD_combined_v1.csv


  last_emotion = df.groupby('Dialogue_ID').apply(lambda x: x.iloc[-1]['Emotion']).reset_index(name='Last_Emotion')
  df_filtered = df.groupby('Dialogue_ID').apply(lambda x: x.iloc[:-1]).reset_index(drop=True)


In [34]:
df_combined_v1.head()

Unnamed: 0,Dialogue_ID,Utterance,Emotion,Last_Emotion,audio_name
0,0,also I was the point person on my companys tr...,"[neutral, neutral, neutral, neutral, surprise,...",neutral,dia0_combined.mp4
1,1,But then who? The waitress I went out with las...,"[surprise, sadness, surprise, fear, neutral, n...",neutral,dia1_combined.mp4
2,2,"Hey, Mon. Hey-hey-hey. You wanna hear somethin...","[neutral, neutral, joy, sadness, surprise, neu...",joy,dia2_combined.mp4
3,3,"Oh my God, oh my God! Poor Monica! What, what,...","[surprise, surprise, surprise, neutral, neutra...",neutral,dia3_combined.mp4
4,4,"Hey! Hi! What are you doing here? Ah y'know, t...","[surprise, joy, surprise, neutral, neutral, ne...",neutral,dia4_combined.mp4


In [35]:
df_combined_v1.tail()

Unnamed: 0,Dialogue_ID,Utterance,Emotion,Last_Emotion,audio_name
1033,1034,"Hey, Mon, I was just doing the dishes! Hey! Oh...","[neutral, joy, surprise]",surprise,dia1034_combined.mp4
1034,1035,What about me?! You-you just said I could! Im...,"[anger, neutral, anger, neutral, neutral, ange...",joy,dia1035_combined.mp4
1035,1036,Hey-hey! Stanley! Hey-hey! You're leading man ...,"[joy, sadness, surprise, sadness, surprise, ne...",sadness,dia1036_combined.mp4
1036,1037,"Rachel, do you have any muffins left? Yeah, I ...","[neutral, neutral, neutral]",neutral,dia1037_combined.mp4
1037,1038,Hey. Hey! So how was Joan? I broke up with her...,"[neutral, joy, neutral, neutral, surprise, dis...",joy,dia1038_combined.mp4


In [6]:
'''
V2
Save the pre-processed data (MELD)
Save by conversation (every utterances except last one + label of last one)
*V2: add label as text at the end of each conversation before feeding it to WaveLM/RoBERTa
'''

# Add emotion to the end of uttence
df['Utterance'] = df['Utterance'] + ' ' + df['Emotion']

# Extract the last emotion for each dialogue
last_emotion = df.groupby('Dialogue_ID').apply(lambda x: x.iloc[-1]['Emotion']).reset_index(name='Last_Emotion')

# Remove the last utterance of each dialogue
df_filtered = df.groupby('Dialogue_ID').apply(lambda x: x.iloc[:-1]).reset_index(drop=True)

# Merge uttence into one row
# Group by Dialogue ID and concatenate utterances
df_combined_v2 = df_filtered.groupby('Dialogue_ID').agg({
    'Utterance': lambda x: ' '.join(x),  # Combine utterances into one string
    'Emotion': lambda x: list(x),  # Keep emotions as a list
    #'Sentiment': lambda x: list(x)  # Keep sentiments as a list (optional)
}).reset_index()

# Merge the last emotion back into df_combined_v2
df_combined_v2 = df_combined_v2.merge(last_emotion, on='Dialogue_ID', how='left')

# Add the 'audio_name' column based on Dialogue_ID
df_combined_v2['audio_name'] = df_combined_v2['Dialogue_ID'].apply(lambda x: f"dia{x}_combined.mp4")

# Save the processed dataset
df_combined_v2.to_csv("MELD_combined_v2.2.csv", index=False)
print("Processed dataset saved as MELD_combined_v2.2.csv")


Processed dataset saved as MELD_combined_v2.2.csv


  last_emotion = df.groupby('Dialogue_ID').apply(lambda x: x.iloc[-1]['Emotion']).reset_index(name='Last_Emotion')
  df_filtered = df.groupby('Dialogue_ID').apply(lambda x: x.iloc[:-1]).reset_index(drop=True)


In [55]:
df_combined_v2.head()

Unnamed: 0,Dialogue_ID,Utterance,Emotion,Last_Emotion,audio_name
0,0,also I was the point person on my company’s tr...,"[neutral, neutral, neutral, neutral, surprise,...",neutral,dia0_combined.mp4
1,1,But then who? The waitress I went out with las...,"[surprise, sadness, surprise, fear, neutral, n...",neutral,dia1_combined.mp4
2,2,"Hey, Mon. neutral Hey-hey-hey. You wanna hear ...","[neutral, neutral, joy, sadness, surprise, neu...",joy,dia2_combined.mp4
3,3,"Oh my God, oh my God! Poor Monica! surprise Wh...","[surprise, surprise, surprise, neutral, neutra...",neutral,dia3_combined.mp4
4,4,Hey! surprise Hi! joy What are you doing here?...,"[surprise, joy, surprise, neutral, neutral, ne...",neutral,dia4_combined.mp4


In [45]:
df_combined_v2.tail()

Unnamed: 0,Dialogue_ID,Utterance,Emotion,Last_Emotion,audio_name
1033,1034,"Hey, Mon, I was just doing the dishes! neutral...","[neutral, joy, surprise]",surprise,dia1034_combined.mp4
1034,1035,What about me?! You-you just said I could! ang...,"[anger, neutral, anger, neutral, neutral, ange...",joy,dia1035_combined.mp4
1035,1036,Hey-hey! Stanley! Hey-hey! You're leading man ...,"[joy, sadness, surprise, sadness, surprise, ne...",sadness,dia1036_combined.mp4
1036,1037,"Rachel, do you have any muffins left? neutral ...","[neutral, neutral, neutral]",neutral,dia1037_combined.mp4
1037,1038,Hey. neutral Hey! joy So how was Joan? neutral...,"[neutral, joy, neutral, neutral, surprise, dis...",joy,dia1038_combined.mp4


In [52]:
import shlex
import torch.nn.functional as F

def extract_audio_from_mp4(mp4_path):
    """Extracts audio from an MP4 file and converts it to WAV format."""
    # temp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
    temp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=True).name

    command = ["ffmpeg", "-i", mp4_path, "-ac", "1", "-ar", "16000", "-y", temp_wav]
    subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    return temp_wav


def extract_audio_features(mp4_path):
    """Extracts WavLM features while preserving sequence length."""
    wav_path = extract_audio_from_mp4(mp4_path)
    waveform, sample_rate = torchaudio.load(wav_path)

    if sample_rate != 16000:
        transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
        waveform = transform(waveform)

    inputs = feature_extractor(waveform.squeeze(0), sampling_rate=16000, return_tensors="pt")
    inputs = {key: value.to(device) for key, value in inputs.items()}

    with torch.no_grad():
        outputs = wavlm_model(**inputs)

    audio_features = outputs.last_hidden_state.cpu()  # (1, audio_seq_length, 768)

    # Resize sequence length to exactly 128
    audio_features = F.interpolate(audio_features.permute(0, 2, 1), size=128, mode="linear").permute(0, 2, 1)


    return audio_features  # Shape: (1, audio_seq_length resize to 128, 768) [1, 128, 768])



In [53]:
def extract_text_features(utterance):
    """Extracts RoBERTa features while preserving sequence length."""
    inputs = tokenizer(utterance, truncation=True, max_length=128, return_tensors="pt")
    inputs = {key: value.to(device) for key, value in inputs.items()}

    with torch.no_grad():
        outputs = roberta_model(**inputs)
    
    text_features = outputs.last_hidden_state.cpu()

    # Resize sequence length to exactly 128
    text_features = F.interpolate(text_features.permute(0, 2, 1), size=128, mode="linear").permute(0, 2, 1)

    return text_features  # Shape: (1, text_seq_length resize to 128, 768)  [1, 128, 768])


In [54]:
import torch.nn.functional as F
def combine_audio_text_features(audio_features, text_features):   
    return torch.cat((audio_features, text_features), dim=-1)  # Shape: (1, 128, 1536)  


In [26]:
# v1 version
import pickle
import numpy as np
import torch.nn.functional as F

# Dictionary to store combined features
combined_features_list_v1 = []

# Process each dialogue
for index, row in df_combined_v1.iterrows():     
    try:
        utterance = row["Utterance"]
        mp4_filename = row["audio_name"]
        mp4_path = os.path.join(COMBINEd_DIR, mp4_filename)

        if not os.path.exists(mp4_path):
            print(f"Skipping missing file: {mp4_path}")
            continue

        print(f"Processing {mp4_filename}...")

        audio_features = extract_audio_features(mp4_path)
        text_features = extract_text_features(utterance)

        combined_features = combine_audio_text_features(audio_features, text_features)         
        combined_features_list_v1.append(combined_features)
        # Ensure list is not empty
        if not combined_features_list_v1:
            raise ValueError("Error: combined_features_list_v1 is empty!")
    except Exception as e:
        print(f"Error processing row {index}: {e}")
        continue  # Continue with the next dialogue


# Ensure list is not empty
if not combined_features_list_v1:
    raise ValueError("Error: combined_features_list_v1 is empty!")

# Check if all items are tensors
for i, item in enumerate(combined_features_list_v1):
    if not isinstance(item, torch.Tensor):
        raise TypeError(f"Error: Item {i} is not a tensor! Found {type(item)}")


# # Find the maximum sequence length in the list
# max_seq_length = max(item.shape[1] for item in combined_features_list_v1)
# print(max_seq_length)
# # Pad all tensors to max_seq_length
# padded_features_list = [
#     F.pad(item, (0, 0, 0, max_seq_length - item.shape[1]))  # Pad sequence dimension
#     for item in combined_features_list_v1
# ]

# Convert list to a single tensor
final_tensor_conversion_v1 = torch.cat(combined_features_list_v1, dim=0)


print(f"Final tensor shape: {final_tensor_conversion_v1.shape}")  # Expected: (num_dialogues, 128, 1536)

# Define the file path
file_path_v1 = "final_tensor_conversition_v1.pkl"

# Save the tensor to a .pkl file
with open(file_path_v1, "wb") as f:
    pickle.dump(final_tensor_conversion_v1, f)

print(f"Tensor saved to {file_path_v1}")

Processing dia0_combined.mp4...
Processing dia1_combined.mp4...
Processing dia2_combined.mp4...
Processing dia3_combined.mp4...
Processing dia4_combined.mp4...
Processing dia5_combined.mp4...
Processing dia6_combined.mp4...
Processing dia7_combined.mp4...
Processing dia8_combined.mp4...
Processing dia9_combined.mp4...
Processing dia10_combined.mp4...
Processing dia11_combined.mp4...
Processing dia12_combined.mp4...
Processing dia13_combined.mp4...
Processing dia14_combined.mp4...
Processing dia15_combined.mp4...
Processing dia16_combined.mp4...
Processing dia17_combined.mp4...
Processing dia18_combined.mp4...
Processing dia19_combined.mp4...
Processing dia20_combined.mp4...
Processing dia21_combined.mp4...
Processing dia22_combined.mp4...
Processing dia23_combined.mp4...
Processing dia24_combined.mp4...
Processing dia25_combined.mp4...
Processing dia26_combined.mp4...
Processing dia27_combined.mp4...
Processing dia28_combined.mp4...
Processing dia29_combined.mp4...
Processing dia30_com

In [60]:
# v2 version
import pickle
import numpy as np
import torch.nn.functional as F

# Dictionary to store combined features
combined_features_list_v2 = []
combined_features_list_v2_labels = []

# Process each dialogue
for index, row in df_combined_v2.iterrows(): 
    try:
        label = row["Last_Emotion"]
        utterance = row["Utterance"]
        mp4_filename = row["audio_name"]
        mp4_path = os.path.join(COMBINEd_DIR, mp4_filename)

        if not os.path.exists(mp4_path):
            print(f"Skipping missing file: {mp4_path}")
            continue

        print(f"Processing {mp4_filename}...")

        audio_features = extract_audio_features(mp4_path)
        text_features = extract_text_features(utterance)

        combined_features = combine_audio_text_features(audio_features, text_features)         
        combined_features_list_v2.append(combined_features)
        combined_features_list_v2_labels.append(label)
        
    except Exception as e:
        print(f"Error processing row {index}: {e}")
        continue  # Continue with the next dialogue


# Convert list to a single tensor
final_tensor_conversion_v2 = torch.cat(combined_features_list_v2, dim=0)
final_tensor_conversion_v2_labels = pd.DataFrame(combined_features_list_v2_labels, columns=["Last_Emotion"])
final_tensor_conversion_v2_labels.index.name = 'DialogueID'

print(f"Final tensor shape: {final_tensor_conversion_v2.shape}")  # Expected: (num_dialogues, max_seq_length, 1536)


# Define the file path
file_path_v2 = "final_tensor_conversition_v2.pkl"
file_path_v2_labels = "final_tensor_conversition_v2_labels.csv"

# Save the tensor to a .pkl file
with open(file_path_v2, "wb") as f:
    pickle.dump(final_tensor_conversion_v2, f)
    final_tensor_conversion_v2_labels.to_csv(file_path_v2_labels)

print(f"Tensor saved to {file_path_v2}")
print(f"Labels saved to {file_path_v2_labels}")



Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia0_combined.mp4
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia1_combined.mp4
Processing dia2_combined.mp4...




Processing dia3_combined.mp4...
Processing dia4_combined.mp4...
Processing dia5_combined.mp4...
Processing dia6_combined.mp4...
Processing dia7_combined.mp4...
Processing dia8_combined.mp4...
Processing dia9_combined.mp4...
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia10_combined.mp4
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia11_combined.mp4
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia12_combined.mp4
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia13_combined.mp4
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia14_comb

In [57]:
with open(r"final_tensor_conversition_v2.pkl", "rb") as input_file:
   embeded_dataset = pickle.load(input_file)

embeded_dataset

tensor([[[ 1.9422e-01,  7.3074e-02, -3.4201e-02,  ..., -5.5827e-02,
          -3.1426e-02, -2.8269e-02],
         [ 8.3766e-02,  5.9123e-02, -2.5568e-02,  ...,  2.5517e-02,
           1.7586e-01, -3.6616e-03],
         [-7.0393e-02,  2.3142e-02, -1.4576e-01,  ...,  9.1835e-02,
           1.9120e-01,  1.4820e-02],
         ...,
         [-2.0074e-01, -1.0423e-01, -4.2252e-02,  ..., -4.1072e-01,
          -5.5119e-02,  2.3936e-01],
         [-2.5834e-02,  1.6300e-03,  1.6287e-01,  ..., -1.3382e-01,
          -9.1538e-03,  1.5889e-01],
         [ 9.0732e-02,  3.3696e-03, -9.9687e-02,  ...,  1.8062e-01,
           6.2518e-02, -2.9198e-02]],

        [[ 1.5632e-01,  6.1721e-02,  4.5888e-02,  ..., -6.5964e-02,
          -3.8836e-02, -6.3706e-02],
         [ 1.1007e-02, -1.6243e-01,  1.6831e-02,  ...,  4.1382e-02,
          -6.3298e-02, -1.4120e-01],
         [-5.4009e-02, -1.2413e-01, -9.3608e-02,  ...,  1.2063e-02,
          -8.0839e-02, -1.9989e-01],
         ...,
         [-5.8710e-02, -4

In [58]:
embeded_dataset.shape

torch.Size([734, 128, 1536])

In [59]:
df_combined_v2

Unnamed: 0,Dialogue_ID,Utterance,Emotion,Last_Emotion,audio_name
0,0,also I was the point person on my company’s tr...,"[neutral, neutral, neutral, neutral, surprise,...",neutral,dia0_combined.mp4
1,1,But then who? The waitress I went out with las...,"[surprise, sadness, surprise, fear, neutral, n...",neutral,dia1_combined.mp4
2,2,"Hey, Mon. neutral Hey-hey-hey. You wanna hear ...","[neutral, neutral, joy, sadness, surprise, neu...",joy,dia2_combined.mp4
3,3,"Oh my God, oh my God! Poor Monica! surprise Wh...","[surprise, surprise, surprise, neutral, neutra...",neutral,dia3_combined.mp4
4,4,Hey! surprise Hi! joy What are you doing here?...,"[surprise, joy, surprise, neutral, neutral, ne...",neutral,dia4_combined.mp4
...,...,...,...,...,...
963,1034,"Hey, Mon, I was just doing the dishes! neutral...","[neutral, joy]",surprise,dia1034_combined.mp4
964,1035,What about me?! You-you just said I could! ang...,"[anger, neutral, anger, neutral, neutral, anger]",joy,dia1035_combined.mp4
965,1036,Hey-hey! Stanley! Hey-hey! You're leading man ...,"[joy, sadness, surprise, sadness, surprise, ne...",sadness,dia1036_combined.mp4
966,1037,"Rachel, do you have any muffins left? neutral ...","[neutral, neutral]",neutral,dia1037_combined.mp4


In [66]:
# Load your CSV
df_labels = pd.read_csv('final_tensor_conversition_v2_labels.csv', index_col=0)

# Rename the index column
df1 = df1.rename_axis('DialogueID')

df1
# Save the modified DataFrame back to CSV (optional)
# df1.to_csv('final_tensor_conversition_v2_labels.csv')

Unnamed: 0_level_0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime
DialogueID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,1,also I was the point person on my company’s tr...,Chandler,neutral,neutral,0,0,8,21,"00:16:16,059","00:16:21,731"
1,2,You must’ve had your hands full. neutral,The Interviewer,neutral,neutral,0,1,8,21,"00:16:21,940","00:16:23,442"
2,3,That I did. That I did. neutral,Chandler,neutral,neutral,0,2,8,21,"00:16:23,442","00:16:26,389"
3,4,So let’s talk a little bit about your duties. ...,The Interviewer,neutral,neutral,0,3,8,21,"00:16:26,820","00:16:29,572"
4,5,My duties? All right. surprise,Chandler,surprise,positive,0,4,8,21,"00:16:34,452","00:16:40,917"
...,...,...,...,...,...,...,...,...,...,...,...
9984,10474,You or me? neutral,Chandler,neutral,neutral,1038,13,2,3,"00:00:48,173","00:00:50,799"
9985,10475,"I got it. Uh, Joey, women don't have Adam's ap...",Ross,neutral,neutral,1038,14,2,3,"00:00:51,009","00:00:53,594"
9986,10476,"You guys are messing with me, right? surprise",Joey,surprise,positive,1038,15,2,3,"00:01:00,518","00:01:03,520"
9987,10477,Yeah. neutral,All,neutral,neutral,1038,16,2,3,"00:01:05,398","00:01:07,274"


In [67]:
df_tt = pd.read_csv('final_tensor_conversition_v2_labels.csv', index_col=0)
df_tt

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime
0,1,also I was the point person on my company’s tr...,Chandler,neutral,neutral,0,0,8,21,"00:16:16,059","00:16:21,731"
1,2,You must’ve had your hands full. neutral,The Interviewer,neutral,neutral,0,1,8,21,"00:16:21,940","00:16:23,442"
2,3,That I did. That I did. neutral,Chandler,neutral,neutral,0,2,8,21,"00:16:23,442","00:16:26,389"
3,4,So let’s talk a little bit about your duties. ...,The Interviewer,neutral,neutral,0,3,8,21,"00:16:26,820","00:16:29,572"
4,5,My duties? All right. surprise,Chandler,surprise,positive,0,4,8,21,"00:16:34,452","00:16:40,917"
...,...,...,...,...,...,...,...,...,...,...,...
9984,10474,You or me? neutral,Chandler,neutral,neutral,1038,13,2,3,"00:00:48,173","00:00:50,799"
9985,10475,"I got it. Uh, Joey, women don't have Adam's ap...",Ross,neutral,neutral,1038,14,2,3,"00:00:51,009","00:00:53,594"
9986,10476,"You guys are messing with me, right? surprise",Joey,surprise,positive,1038,15,2,3,"00:01:00,518","00:01:03,520"
9987,10477,Yeah. neutral,All,neutral,neutral,1038,16,2,3,"00:01:05,398","00:01:07,274"


# Using .py

In [3]:
from src.conversation_processing import ConversationProcessor_CNN
import os

## Train:
# MELD_folder                 = "train"
# audio_data_folder           = "train_splits"
# text_data_csv_filename      = "train_sent_emo.csv"
# combined_audio_data_folder  = "train_combined"
# combined_csv_filename       = "MELD_combined_v2.csv"
# embeddings_plk_filename     = "final_tensor_conversation_v2"

## Test
MELD_folder                 = "test"
audio_data_folder           = "output_repeated_splits_test"
text_data_csv_filename      = "test_sent_emo.csv"
combined_audio_data_folder  = "test_combined"
combined_csv_filename       = "MELD_combined_v2.csv"
embeddings_plk_filename     = "final_tensor_conversation_v2_test"


myProcessor = ConversationProcessor_CNN(
    project_dir=os.getcwd(),
    MELD_folder=MELD_folder,
    audio_data_folder=audio_data_folder,
    text_data_csv_filename=text_data_csv_filename,
    combined_audio_data_folder=combined_audio_data_folder,
    combined_csv_filename=combined_csv_filename,
    embeddings_plk_filename=embeddings_plk_filename)

myProcessor.Process_pipeline()

# df_p1 = myProcessor.Process2()
# df_p1

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ConversationProcessor initialized using device: cuda
Combining Audio files....
Saved: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\test\test_combined\dia0_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\test\test_combined\dia100_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\test\test_combined\dia101_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\test\test_combined\dia102_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\test\test_combined\dia103_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\test\test_combined\dia104_combined.mp4
Saved: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML



Processing dia1_combined.mp4...
Processing dia2_combined.mp4...
Processing dia3_combined.mp4...
Processing dia4_combined.mp4...
Processing dia5_combined.mp4...
Processing dia6_combined.mp4...
Processing dia7_combined.mp4...
Processing dia8_combined.mp4...
Processing dia9_combined.mp4...
Processing dia10_combined.mp4...
Processing dia11_combined.mp4...
Processing dia12_combined.mp4...
Processing dia13_combined.mp4...
Processing dia14_combined.mp4...
Processing dia15_combined.mp4...
Processing dia16_combined.mp4...
Processing dia17_combined.mp4...
Processing dia18_combined.mp4...
Processing dia20_combined.mp4...
Processing dia21_combined.mp4...
Processing dia22_combined.mp4...
Processing dia23_combined.mp4...
Processing dia24_combined.mp4...
Processing dia25_combined.mp4...
Processing dia27_combined.mp4...
Processing dia28_combined.mp4...
Processing dia29_combined.mp4...
Processing dia30_combined.mp4...
Processing dia32_combined.mp4...
Processing dia33_combined.mp4...
Processing dia34_co

In [2]:
myProcessor.Process3(df_p1)

Extracting AUDIO and TEXT features....
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia0_combined.mp4
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia1_combined.mp4
Processing dia2_combined.mp4...




Processing dia3_combined.mp4...
Processing dia4_combined.mp4...
Processing dia5_combined.mp4...
Processing dia6_combined.mp4...
Processing dia7_combined.mp4...
Processing dia8_combined.mp4...
Processing dia9_combined.mp4...
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia10_combined.mp4
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia11_combined.mp4
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia12_combined.mp4
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia13_combined.mp4
Skipping missing file: C:\Users\allar\Documents\UdeM\IFT6759 AdvProject\Projet\IFT-6759-Advanced-ML-Project\MELD.Raw\train\train_combined\dia14_comb

In [3]:
myProcessor.BASE_DIR

'C:\\Users\\allar\\Documents\\UdeM\\IFT6759 AdvProject\\Projet\\IFT-6759-Advanced-ML-Project'