In [1]:
# TF-IDF, Word2Vec, FastText

import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
with open("Music.txt", "r") as f:
    lines = f.readlines()

In [3]:
cleaned_lines = []

for _ in lines:
    if len(_) > 10:
        cleaned_lines += [_]

In [4]:
cleaned_lines

['Step into a world of musical enchantment as the vivacious Harper takes the stage, weaving her mellifluous voice through the air, promising an exhilarating performance filled with boundless energy.\n',
 'Get ready to be spellbound by the dynamic Caleb, whose resounding vocals and electrifying stage presence are certain to turn this performance into a mesmerizing spectacle of sonic brilliance.\n',
 'Brace yourselves for a soulful journey led by the enchanting Amelia, whose mellifluous and emotive singing is set to create an intimate atmosphere, leaving the audience awestruck.\n',
 'Join the charismatic Leo under the spotlight as he unleashes his powerful and resonant voice, creating a symphony of emotions that will linger long after the final note has been sung.\n',
 'Welcome the incomparable Luna to the stage, where her celestial voice and spellbinding delivery promise to take the audience on a celestial odyssey, making this performance truly otherworldly.\n',
 'Prepare to be transpor

In [5]:
import pandas as pd
df = pd.DataFrame(columns=["RAW"])
df["RAW"] = cleaned_lines

## Preprocessing

### Punctuation

In [6]:
import string

punc = string.punctuation

def clean_punc(string):
    temp = ""
    for c in string:
        if c not in punc:
            temp += c
    return temp
df["no_punc"] = df["RAW"].apply(clean_punc)

### Tokenization

In [7]:
def tokenize(string):
    string = string.lower()
    return string.split()

In [8]:
df["Tokens"] = df["no_punc"].apply(tokenize)
df

Unnamed: 0,RAW,no_punc,Tokens
0,Step into a world of musical enchantment as th...,Step into a world of musical enchantment as th...,"[step, into, a, world, of, musical, enchantmen..."
1,Get ready to be spellbound by the dynamic Cale...,Get ready to be spellbound by the dynamic Cale...,"[get, ready, to, be, spellbound, by, the, dyna..."
2,Brace yourselves for a soulful journey led by ...,Brace yourselves for a soulful journey led by ...,"[brace, yourselves, for, a, soulful, journey, ..."
3,Join the charismatic Leo under the spotlight a...,Join the charismatic Leo under the spotlight a...,"[join, the, charismatic, leo, under, the, spot..."
4,"Welcome the incomparable Luna to the stage, wh...",Welcome the incomparable Luna to the stage whe...,"[welcome, the, incomparable, luna, to, the, st..."
...,...,...,...
503,"Alright folks, it's time to get into the groov...",Alright folks its time to get into the groove ...,"[alright, folks, its, time, to, get, into, the..."
504,Get ready to turn up the heat on the dance flo...,Get ready to turn up the heat on the dance flo...,"[get, ready, to, turn, up, the, heat, on, the,..."
505,Welcome everyone to a night of musical magic a...,Welcome everyone to a night of musical magic a...,"[welcome, everyone, to, a, night, of, musical,..."
506,"Hold onto your hats, because tonight's musical...",Hold onto your hats because tonights musical p...,"[hold, onto, your, hats, because, tonights, mu..."


### Stopwords

In [9]:
st = stopwords.words("english")

In [10]:
def remove_stop_words(tokens):
    temp = []
    for tk in tokens:
        if tk not in st:
            temp += [tk]

    return temp

df["removed_stop"] = df["Tokens"].apply(remove_stop_words)

In [11]:
df

Unnamed: 0,RAW,no_punc,Tokens,removed_stop
0,Step into a world of musical enchantment as th...,Step into a world of musical enchantment as th...,"[step, into, a, world, of, musical, enchantmen...","[step, world, musical, enchantment, vivacious,..."
1,Get ready to be spellbound by the dynamic Cale...,Get ready to be spellbound by the dynamic Cale...,"[get, ready, to, be, spellbound, by, the, dyna...","[get, ready, spellbound, dynamic, caleb, whose..."
2,Brace yourselves for a soulful journey led by ...,Brace yourselves for a soulful journey led by ...,"[brace, yourselves, for, a, soulful, journey, ...","[brace, soulful, journey, led, enchanting, ame..."
3,Join the charismatic Leo under the spotlight a...,Join the charismatic Leo under the spotlight a...,"[join, the, charismatic, leo, under, the, spot...","[join, charismatic, leo, spotlight, unleashes,..."
4,"Welcome the incomparable Luna to the stage, wh...",Welcome the incomparable Luna to the stage whe...,"[welcome, the, incomparable, luna, to, the, st...","[welcome, incomparable, luna, stage, celestial..."
...,...,...,...,...
503,"Alright folks, it's time to get into the groov...",Alright folks its time to get into the groove ...,"[alright, folks, its, time, to, get, into, the...","[alright, folks, time, get, groove, let, music..."
504,Get ready to turn up the heat on the dance flo...,Get ready to turn up the heat on the dance flo...,"[get, ready, to, turn, up, the, heat, on, the,...","[get, ready, turn, heat, dance, floor, show, b..."
505,Welcome everyone to a night of musical magic a...,Welcome everyone to a night of musical magic a...,"[welcome, everyone, to, a, night, of, musical,...","[welcome, everyone, night, musical, magic, may..."
506,"Hold onto your hats, because tonight's musical...",Hold onto your hats because tonights musical p...,"[hold, onto, your, hats, because, tonights, mu...","[hold, onto, hats, tonights, musical, performa..."


In [12]:
nltk.download('wordnet',  download_dir="./") 
nltk.download('averaged_perceptron_tagger', download_dir="./")

from nltk.stem import WordNetLemmatizer

lemma = WordNetLemmatizer()

def lemmatize(tokens):
    temp = [lemma.lemmatize(tk) for tk in tokens]
    return temp

[nltk_data] Downloading package wordnet to ./...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to ./...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [13]:
df["lems"] = df["removed_stop"].apply(lemmatize)
df

Unnamed: 0,RAW,no_punc,Tokens,removed_stop,lems
0,Step into a world of musical enchantment as th...,Step into a world of musical enchantment as th...,"[step, into, a, world, of, musical, enchantmen...","[step, world, musical, enchantment, vivacious,...","[step, world, musical, enchantment, vivacious,..."
1,Get ready to be spellbound by the dynamic Cale...,Get ready to be spellbound by the dynamic Cale...,"[get, ready, to, be, spellbound, by, the, dyna...","[get, ready, spellbound, dynamic, caleb, whose...","[get, ready, spellbound, dynamic, caleb, whose..."
2,Brace yourselves for a soulful journey led by ...,Brace yourselves for a soulful journey led by ...,"[brace, yourselves, for, a, soulful, journey, ...","[brace, soulful, journey, led, enchanting, ame...","[brace, soulful, journey, led, enchanting, ame..."
3,Join the charismatic Leo under the spotlight a...,Join the charismatic Leo under the spotlight a...,"[join, the, charismatic, leo, under, the, spot...","[join, charismatic, leo, spotlight, unleashes,...","[join, charismatic, leo, spotlight, unleashes,..."
4,"Welcome the incomparable Luna to the stage, wh...",Welcome the incomparable Luna to the stage whe...,"[welcome, the, incomparable, luna, to, the, st...","[welcome, incomparable, luna, stage, celestial...","[welcome, incomparable, luna, stage, celestial..."
...,...,...,...,...,...
503,"Alright folks, it's time to get into the groov...",Alright folks its time to get into the groove ...,"[alright, folks, its, time, to, get, into, the...","[alright, folks, time, get, groove, let, music...","[alright, folk, time, get, groove, let, music,..."
504,Get ready to turn up the heat on the dance flo...,Get ready to turn up the heat on the dance flo...,"[get, ready, to, turn, up, the, heat, on, the,...","[get, ready, turn, heat, dance, floor, show, b...","[get, ready, turn, heat, dance, floor, show, b..."
505,Welcome everyone to a night of musical magic a...,Welcome everyone to a night of musical magic a...,"[welcome, everyone, to, a, night, of, musical,...","[welcome, everyone, night, musical, magic, may...","[welcome, everyone, night, musical, magic, may..."
506,"Hold onto your hats, because tonight's musical...",Hold onto your hats because tonights musical p...,"[hold, onto, your, hats, because, tonights, mu...","[hold, onto, hats, tonights, musical, performa...","[hold, onto, hat, tonight, musical, performanc..."


In [14]:
df.to_csv("ProcessedMusicScripts.csv", index=False)