In [1]:
import pandas as pd 
import string
import numpy as np
import os

In [2]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

# Download stopwords and punkt if not already downloaded
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\bhanw\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\bhanw\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
caption_df = pd.read_csv("data/Mname&Captions.csv")
music_folder = 'data/music'

In [4]:
caption_df = caption_df[:10]

In [5]:
def remove_stop_words(text):
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(text)
    filtered_text = [word for word in word_tokens if word.lower() not in stop_words]
    return ' '.join(filtered_text)

In [6]:
def text_p(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text

In [7]:
# Initialize the stemmer
stemmer = PorterStemmer()

# Function to stem each word in the text
def stem_text(text):
    words = word_tokenize(text)
    stemmed_words = [stemmer.stem(word) for word in words]
    return ' '.join(stemmed_words)

In [8]:
def text_preprocess(text):
    text = text_p(text)
    text = remove_stop_words(text)
    text = stem_text(text)
    return text

## What did I done for text_preprocess
1. Lowercasing
2. Removing Punctuation
3. Removing Stop Words
4. Stemming

In [9]:
caption_df["Preprocessed_Text"] = caption_df['caption'].apply(text_preprocess)

In [10]:
caption_df

Unnamed: 0,audio,caption,Preprocessed_Text
0,drJaSu3AWhQ_0.wav,The low quality recording features a funky blu...,low qualiti record featur funki blue acoust gu...
1,k-J2-Ou1Fm8_4.wav,"Single sounds, noises and melodies being playe...",singl sound nois melodi play lot reverb delay ...
2,drJaSu3AWhQ_0.wav,The low quality recording features a funky blu...,low qualiti record featur funki blue acoust gu...
3,k-J2-Ou1Fm8_4.wav,"Single sounds, noises and melodies being playe...",singl sound nois melodi play lot reverb delay ...
4,9jeEfi6nDak_0.wav,This music sounds oriental full of semitones a...,music sound orient full semiton quarternot use...
5,yIFP8fkq8GU_1.wav,THis song contains a piano playing a major cho...,song contain piano play major chord progress m...
6,bt7rDryN7G4_2.wav,This music is an electronic guitar instrumenta...,music electron guitar instrument tempo medium ...
7,Zxyfhub6nV4_3.wav,This audio holds a church bell ringing loudly ...,audio hold church bell ring loudli lot sustain...
8,1tz4xNRRR4M_3.wav,This lullaby features a fruity male vocal talk...,lullabi featur fruiti male vocal talk haunt be...
9,w09XinexaIY_1.wav,The low quality recording features synchronize...,low qualiti record featur synchron wooden perc...


### Word embedding using GloVe

In [11]:
# Load GloVe model
def load_glove_model(file_path):
    print("Loading GloVe Model...")
    glove_model = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.split()
            word = parts[0]
            vector = np.array(parts[1:], dtype='float32')
            glove_model[word] = vector
    print(f"Done. {len(glove_model)} words loaded!")
    return glove_model

In [12]:
glove_model = load_glove_model("data\glove.6B.50d.txt")

  glove_model = load_glove_model("data\glove.6B.50d.txt")


Loading GloVe Model...
Done. 400000 words loaded!


In [13]:
# Function to get sentence embedding
def get_sentence_embedding(sentence, model):
    words = sentence.split()
    word_vectors = [model[word] for word in words if word in model]
    if not word_vectors:  # If none of the words are in the model, return a zero vector
        return np.zeros(next(iter(model.values())).shape)
    return np.mean(word_vectors, axis=0)


In [14]:
caption_df['embedding'] = caption_df['Preprocessed_Text'].apply(lambda x: get_sentence_embedding(x, glove_model))


In [15]:
caption_df

Unnamed: 0,audio,caption,Preprocessed_Text,embedding
0,drJaSu3AWhQ_0.wav,The low quality recording features a funky blu...,low qualiti record featur funki blue acoust gu...,"[-0.583912, 0.17111021, -0.061589416, 0.200745..."
1,k-J2-Ou1Fm8_4.wav,"Single sounds, noises and melodies being playe...",singl sound nois melodi play lot reverb delay ...,"[-0.13017778, 0.25143927, -0.36494887, -0.0897..."
2,drJaSu3AWhQ_0.wav,The low quality recording features a funky blu...,low qualiti record featur funki blue acoust gu...,"[-0.583912, 0.17111021, -0.061589416, 0.200745..."
3,k-J2-Ou1Fm8_4.wav,"Single sounds, noises and melodies being playe...",singl sound nois melodi play lot reverb delay ...,"[-0.13017778, 0.25143927, -0.36494887, -0.0897..."
4,9jeEfi6nDak_0.wav,This music sounds oriental full of semitones a...,music sound orient full semiton quarternot use...,"[-0.22077164, 0.26122683, -0.431657, 0.1322846..."
5,yIFP8fkq8GU_1.wav,THis song contains a piano playing a major cho...,song contain piano play major chord progress m...,"[-0.18766837, 0.26252002, -0.20414497, -0.1557..."
6,bt7rDryN7G4_2.wav,This music is an electronic guitar instrumenta...,music electron guitar instrument tempo medium ...,"[-0.40039217, 0.007086499, -0.31372836, -0.105..."
7,Zxyfhub6nV4_3.wav,This audio holds a church bell ringing loudly ...,audio hold church bell ring loudli lot sustain...,"[-0.24701935, 0.039373647, 0.36798015, 0.14282..."
8,1tz4xNRRR4M_3.wav,This lullaby features a fruity male vocal talk...,lullabi featur fruiti male vocal talk haunt be...,"[0.00028506448, 0.28652883, -0.28556514, -0.09..."
9,w09XinexaIY_1.wav,The low quality recording features synchronize...,low qualiti record featur synchron wooden perc...,"[-0.54268664, 0.04859668, 0.2034855, -0.127628..."


In [16]:
import IPython.display as ipd
ipd.Audio(f"data/music/{caption_df['audio'][0]}")