# Capstone: Musical Recommender

Kelly Slatery | US-DSI-10

In [1]:
# Imports 
import numpy as np
import pandas as pd
import time
import requests
from bs4 import BeautifulSoup
import regex as re
from nltk.tokenize import RegexpTokenizer
import unicodedata

In [2]:
# Set view options
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Import and Clean Data

In [3]:
# Import list of musicals
names = pd.read_csv('./data/musical_names.csv')
names.shape

(196, 1)

In [4]:
names.head()

Unnamed: 0,musical
0,Les Misérables
1,The Phantom of the Opera
2,Hamilton
3,West Side Story
4,Wicked


In [5]:
# Convert accented characters to ascii (normal) characters
names['musical'] = [unicodedata.normalize('NFKD', name).encode('ASCII', 'ignore').decode('utf-8') for name in names['musical']]
names.head()

Unnamed: 0,musical
0,Les Miserables
1,The Phantom of the Opera
2,Hamilton
3,West Side Story
4,Wicked


In [6]:
# Remove all spaces and capital letters
names['musical'] = [re.sub('[^a-zA-Z0-9]', '', name).replace("'", '').lower() for name in names['musical']]
names.head()

Unnamed: 0,musical
0,lesmiserables
1,thephantomoftheopera
2,hamilton
3,westsidestory
4,wicked


# Data Collection

In [7]:
# Set up base url
baseurl1 = 'https://www.allmusicals.com/lyrics/'
baseurl2 = '/synopsis.htm'

# f'https://www.allmusicals.com/lyrics/{}/synopsis.htm'

In [8]:
# Set up empty list for musical synopses from allmusicals.com
synopses = []
missed_musicals = []

# Loop through all musicals in the list
for name in names['musical']:
    
    # Handle titles starting with "The"
    if name[:3] == 'the':
        
        # Set up list of links to loop over
        possible_names = [name[3:], name[3:] + 'the', name, name + 'the']
        i = 0        
        url = baseurl1 + possible_names[i] + baseurl2
        res = requests.get(url)

        # Check for which version of the musical name returns a result, if any
        while res.status_code != 200:
            i += 1
            try:
                url = baseurl1 + possible_names[i] + baseurl2
            except IndexError:
                print(f'Synopsis for "{name}" not listed on allmusicals.com. Try another source like Wikipedia.')
                url = ''
                missed_musicals.append(name)
                break
            res = requests.get(url)
    
    # Access webpage for all musicals not containing "The"
    else: 
        url = baseurl1 + name + baseurl2
        res = requests.get(url)

    # Get synposis
    if res.status_code == 200:
        soup = BeautifulSoup(res.content)
        synopsis_html = soup.find('div', {'id': 'page'}).text.replace('\r', '')
        synopsis_list = [words.strip() for words in synopsis_html.split('\n')[2:-2]]
        synopsis = ' '.join(synopsis_list).strip()
        synopses.append(synopsis)
        print(f'Synopsis for "{name}" added to list. Moving on to the next musical...')
    else:
        synopses.append('')
        missed_musicals.append(name)
        print(f'Synopsis for "{name}" not listed on allmusicals.com. Try another source like Wikipedia.')
        continue

Synopsis for "lesmiserables" added to list. Moving on to the next musical...
Synopsis for "thephantomoftheopera" added to list. Moving on to the next musical...
Synopsis for "hamilton" added to list. Moving on to the next musical...
Synopsis for "westsidestory" added to list. Moving on to the next musical...
Synopsis for "wicked" added to list. Moving on to the next musical...
Synopsis for "chicago" added to list. Moving on to the next musical...
Synopsis for "rent" added to list. Moving on to the next musical...
Synopsis for "thelionking" added to list. Moving on to the next musical...
Synopsis for "thebookofmormon" added to list. Moving on to the next musical...
Synopsis for "sweeneytoddthedemonbarberoffleetstreet" added to list. Moving on to the next musical...
Synopsis for "thesoundofmusic" added to list. Moving on to the next musical...
Synopsis for "fiddlerontheroof" added to list. Moving on to the next musical...
Synopsis for "intothewoods" added to list. Moving on to the next m

Synopsis for "thefantasticks" added to list. Moving on to the next musical...
Synopsis for "once" not listed on allmusicals.com. Try another source like Wikipedia.
Synopsis for "jekyllandhyde" not listed on allmusicals.com. Try another source like Wikipedia.
Synopsis for "beautifulthecarolekingmusical" not listed on allmusicals.com. Try another source like Wikipedia.
Synopsis for "brigadoon" added to list. Moving on to the next musical...
Synopsis for "onthetown" added to list. Moving on to the next musical...
Synopsis for "thewhostommy" added to list. Moving on to the next musical...
Synopsis for "bemorechill" added to list. Moving on to the next musical...
Synopsis for "thewiz" added to list. Moving on to the next musical...
Synopsis for "funhome" added to list. Moving on to the next musical...
Synopsis for "catchmeifyoucan" added to list. Moving on to the next musical...
Synopsis for "sweetcharity" added to list. Moving on to the next musical...
Synopsis for "mame" added to list. Mo

In [9]:
# Check that length of synopses is 196
print(len(synopses))

# Look at which musicals are missing synopses
print(missed_musicals)

196
['achorusline', 'grease', 'beautyandthebeast', 'gypsyamusicalfable', 'cinderella', 'billyelliotthemusical', 'schoolofrockthemusical', 'alittlenightmusic', 'porgyandbess', 'afunnythinghappenedonthewaytotheforum', 'agentlemansguidetoloveandmurder', 'once', 'jekyllandhyde', 'beautifulthecarolekingmusical', 'sisteract', 'achristmascarol', 'shrekthemusical', 'pajamagame', 'bonnieclyde', 'beetlejuice', 'tarzanoftheapes', 'anewbrain', 'tuckeverlasting', 'drseusshowthegrinchstolechristmas', 'ohcalcutta', 'dogfight', 'suessicalthemusical', 'twisted', 'shenandoah', 'coco', 'cyrano', 'ballroom', 'themagicshow', 'themagicshow', 'dancin', 'bigdeal']


In [10]:
# How many musicals were missed?
len(missed_musicals)

36

# Export Data

In [11]:
# Add synopses to dataframe
names['synopsis'] = synopses
names.head()

Unnamed: 0,musical,synopsis
0,lesmiserables,"The musical takes place at the beginning of the XIX century. After 19 years of penal servitude, Jean Valjean is released by policeman Javert. He was arrested for theft of bread for his family. Every month he has to report to the lawyer. Jean isn't hired and despised. Once, he was sheltered by the archbishop. He treated him as brother. Jean Valjean, who still doesn't believe in love, steals all silver in the house and runs. In the morning, he is brought to the archbishop. The man is beat to semi-death. According to policemen, the man has told them that silver was presented him by the archbishop. He agrees with it and gives Jean two silver candlesticks, which Jean stores to his death. The man is so touched by care that he tears all his documents and begins new life. Jean Valjean sells all silver and becomes the mayor 8 years later. He is also an owner of the factory. Poor girl Fantine works on it. Fantine is exposed to harassment of the foreman and contempt of colleagues. She has a secret. A few years ago, the man has deceived her and has left. She became pregnant and has given birth to the daughter. The girl, Kozetta, grows at the innkeeper and his wife's family. They have their own daughter too. They cruelly manage with the girl. At the same time, the family writes to her mother and swears that she is often ill. Fantine sends them a lot of money and thinks that she rescues the daughter. Soon her secret is revealed by other workers. The mayor asks the foreman to resolve the conflict. Fantine is dismissed. Despaired woman sells her hair and teeth. She hopes to rescue her ""ill"" daughter. Then she becomes a prostitute. One night, when the client comes to her, she refuses to work. The man offends her and Fantine hits him. At this moment police officers appear, including Javert. Jean Valjean rescues Fantine from prison and carries in hospital. The woman asks him to take care of the daughter and dies. Javert understands that the mayor is Jean, and wants to put him in prison as he looked for fugitive for many years. Jean is escaped. At this time, Kozetta is sent to winter forest for water, where she is found by Valjean. He redeems the girl from innkeepers and asks the church gardener for a shelter for her. The girl lives in the monastery. Times of June revolt. In the center of a plot are the students who have started revolution. Little Gavroche, the son of impoverished innkeepers, helps them. Marius, one of the associates, notices Kozetta on the street and falls in love with her. The girl reciprocates the young man. By means of Eponine, who is also in love with Marius, he finds the house where lives Kozetta. They start to date in secret from everyone. Jeanne Valjean and Kozetta are forced to run because of Javert arrival. Gavroche brings them a note from barricades. To rescue Kozetta's beloved, the man goes to the heat of revolt. Just at this moment, students detain Javert. They allow Jean Valjean to kill him. He agrees, and doesn't kill Javert, but releases him on freedom. At this time on barricades, slaughter is started again. Gavroche and Eponina are killed during fight. When fight is resumed, everyone is in mourning. This time law enforcement authorities try to kill everyone. Jean Valjean rescues wounded Marius. During this he meets the innkeeper on the road (he has stolen a family ring from Marius). Valjean and half-dead Marius are awaited by Javert. He claims that he will arrest the man for all crimes, which Jean has made. However, he doesn't decide to shoot at the person, who has saved his life recently. The guard is exposed to remorse and commits suicide. Marius recovers. He gets married with Kozetta. Jean Valjean tells the guy his story and departs to the monastery to die. The innkeeper and his wife come to a wedding and tell Marius scurrilous things about Jean Valjean. They don't understand that they speak about the one who has rescued him. Marius and Kozetta run to the monastery, finding the last minutes of Jean's life. When he dies, he is met by Fantine, the archbishop and all dead on barricades. The last song sounds."
1,thephantomoftheopera,"At the beginning of the XX century in the Paris Opera House was selling of old props. The most mysterious lot was broken chandelier. With its fall relates the story of the mysterious Phantom, who many years ago was the music patron of this place. Chandelier again rose to the ceiling of the theater & time turned back. It was 1881. In the hall was rehearsed the evening performance. Appeared Phantom so angered diva Carlotta that she categorically refused to act. New theater owners replaced the lead actress. The young chorus girl Christine knew a whole party of diva. She perfectly performed in the evening & very surprised new owners. After a triumphant debut of Christine, she told her friend about the mysterious music angel, who taught her singing. Soon, to her came a new sponsor of the theater Viscount & recognized a girl – in a childhood, they often played together. A girl told to a young man her deceased father’s telling, which included such music angel. A girl also admitted that this mysterious patron taught her singing. Laughed at her imagination, he invited her to dine. When the young man left the dressing room, in the mirror appeared Phantom. He took her into the mysterious world, which was located deep underground. Teaching his protégé singing, the patron brought the girl to his home & when she fainted, he gently laid her down on his bed. Waking up, she went to an angel & took off his mask to see the true face of her patron. Soon theater owners received a note from the Phantom. He demanded that the main role in the new opera performed Christine. Otherwise, the theater would suffer a terrible disaster. Owners calmed down furious diva, assuring her in her irreplaceability. During her performance, Phantom made so that the singer lost her voice. Trying to save the play, owners let the ballet on the stage. But then the audience saw the corpse under the ceiling, hanging on a rope & heard a sinister laugh of Phantom. Using the general turmoil, Christine took Mr. Chagny to the roof of the theater, where they explained to each other in love. Overhearing their conversation, Phantom has vowed to avenge to them two. After 6 months, Phantom visited Masquerade. He brought the owners a new musical creation & demanded a main part for Christine. Mrs. Giry told to viscount a story of Phantom. He was originally born with a disfigured face, the boy grew up in a mobile waxworks. When he managed to escape from there, the teenager found refuge in the theater. There he realized that his calling is to compose music. Viscount decided to catch the Phantom. Christine was choosing between her beloved & teacher. Visiting her father’s grave, the girl almost fell under the spell of Phantom, but she was rescued by viscount. During the premiere, Christine realized that her partner should be Phantom. When she tore off his mask, the audience saw the mutilated face of a man & he kidnapped a girl from the stage. Viscount was in search of his beloved, when Mrs. Giry showed a way to the dungeon, where Christine was forced to wear a wedding dress. Phantom soon caught the Viscount, who found a way to Phantom’s habitat. A man said that would let go Viscount, if only a girl stays with him. Otherwise her lover would die. Realizing feelings of Phantom, Christine kissed him. Having experienced the first time in life someone else's compassion, Phantom was shocked. He told the girl that he loves her, but she forced herself to turn away from her mentor. Christine & Viscount left. Closed by pallium, Phantom started to cry. Soon, in his lair broke angry mob. But under the pallium, people only found mask of a men – Phantom disappeared."
2,hamilton,"Can we call a thing more boring than the history? Everyone can remember the long hours spent on the school bench under monotonic and gray monologue of teacher, reading paragraph by paragraph, to enter steadily and firmly in the young stupid heads the chronology from the beginning of times until today. But sometimes there is nothing more fascinating than the history. At all the pages of books, written by bored pundits, contain no drop of inspiration, not a grain of creativity. Musical «Hamilton», created by Lin-Manuel Miranda, is an outstanding example, of how the history should be revealed. Miranda spent six long years of hard and tireless work on his own brainchild, being inspired by the biography of his native country and its founding fathers, especially the notorious Alexander Hamilton. The plot plunges us into the story of the life of one of the great politicians, says about his childhood, the years of a long and hard studying, and finally, about his formation as a political figure, skillfully touching the threads of fate of great state, during the fight for its independence. The story takes us to the atmosphere of the time, and every detail is thought out so that the film captivates us entirely. This musical is as close to modern musical trends as possible: its motifs include smooth melodies of romantic and lyrical works, and after, the ricochet of hip-hop beats on our hearing. Seemingly boring talks about finances and war, usually prompting boredom on the viewer, now transformed into real musical battles, under hip-hop. Alexander Hamilton lived before in the minds of only a dry summaries of the handbooks about the history. And his face is on a ten-bill. Now he comes to life and is presented to the audience on the stage as simple, perfectly ordinary mortal person, capable of making mistakes, looking for ways to retreat and solutions, in love or naive, strict or cruel politician – this all matters. He is alive. This story takes us in his journey from the very beginning, proving that we are all humans, no matter how great we were. It captivates and attracts. So much, that tickets on this presentation fly out with incredible speed, and many famous people of our time (for example, President Barack Obama and lots of movie stars) considered it as their duty to attend the musical. And more than once!"
3,westsidestory,"The two gangs – Jets, consisting of white & Sharks, including the Puerto Ricans – fought for control in West Side district. The leader of the first ones decided on the next dance to challenge Bernardo, the leader of the second. Riff called for challenge the gang’s member who grew up & now was working at a pharmacy, Tony. At first, he refused, but then he allowed himself to be persuaded. The newly arrived Maria worked in the store along with Anita, the girlfriend of his brother Bernardo, Sharks’ leader. A girl was supposed to marry Chino, but she did not feel any senses to him. Anita helped her prepare the dress for dancing. During the party, Jets’ & Sharks’ representatives started struggled in the dance. Seeing each other, Tony & Maria stopped noticing others. Soon they began to dance. Then brother sent her home. Bernardo & Riff agreed to hold a meeting on neutral territory. Tony fell in love with Maria at a first sight. He came to her house & started singing serenades under her window. When a girl came out onto the fire escape, the young people explained each other in their feelings. During the meeting, participants of bands confirmed – only fists are allowed during the battle. Bernardo was thinking that he would have to fight Tony, but Riff stood against him his the second fighter. Maria found out in the morning from Anita about the upcoming fight. She asked Tony to stop the planned clash. The young man promised to do everything he could. The boy & the girl began to dream about the wedding. During the fight, Tony tried to convince Bernardo to stop the fight. But in response, he heard nothing but ridicule. When the Sharks leader pushed him, Riff hit Bernardo. Tony tried to stop a broke out fight. By chance, he caused the death of his friend from Bernardo’s knife. In anger, the boy killed the leader of Sharks. When there was the sound of police sirens, shocked Tony realized that now on his hands was the blood of Maria’s brother. He was hardly persuaded to flee from the crime scene. Maria was is ignorance thinking about her beloved. When Chino came home & told about Bernardo’s death by Tony’s hand, woman did not believe him. But then her beloved confirmed this to Maria. Puerto Rican girl could not start to hate him, because her love was stronger than family ties. Young people have agreed on a joint flee. Soon members of the Jets gang learned that armed with a gun Chino is looking for Tony. They also began to search for their friend. When upset Anita came to killed Bernardo’s house, she saw a young man leaving from Maria. Angry at first, she then realized that their feelings were stronger than death. Anita told her friend about Chino & his gun. She later agreed to send a message to Tony. When a girl went to look for the young man, she was attacked by Jets. They were stopped by the pharmacy owner. Angry Anita deceived members of the gang, saying that Chino has shot Maria dead. The pharmacist told Tony about her words, who dreamed of a peaceful & happy life with his beloved. The young man went to Chino, so he shot him too. At the last moment, the young man was able to see alive Maria. Beloved died at the hands of a girl. Members of both bands decided to figure out their relationship over Tony's body. Taking the gun, Maria said she would shoot any of them, because they were guilty of all the deaths occurred. But she did not have the guts to do it. Teenagers from both bands came to the body of Tony & helped Maria carry him. Their feud ended."
4,wicked,"Good Witch Glinda of Oz country reported that the Wicked Witch Elphaba is dead. They used to be friends, and the memories of their life story in Shiz University started. Elphaba from the very childhood was a rogue child, being born with green skin color, as her mother had used the green elixir. She also had a sister Nessarose in a wheelchair. When the two sisters came to Shiz, the local bosses took custody over Nessarose. Despite the sociable character of Elphaba, the green color of her skin did not allow her to make friends. Gradually, she became friends with her roommate, Glinda. She desperately desired to meet the Wizard of Oz, who allegedly was able to make her look like of a usual person. Elphaba had feelings for her classmate Fiyero, even though deep down, she knew that she would never be his chosen one. He replied sympathetically, although he also liked Glinda. But their sympathy had no development – Elphaba was sent to the Emerald City to meet the Wizard. Dreams of help with skin color were not justified at girl – the magician used sorcerer girl's abilities to take credit for her miracles. Elphaba in despair stole an ancient magical book, and ran away being dubbed for that the Wicked Witch and it was declared her hunt down. Using magical powers, the sorcerer managed to escape. As time passed, Fiyero became a captain of the guard and announced the engagement to Glinda, which was now known as the Good Witch. A guy named Bog was also secretly in love with Glinda, though he was loved by Nessarose, who became governor after the death of her father. When Bog decided to confess Glinda his feelings, jealous Nessa has imposed on him inept spell, wishing to deprive the senses to rival. Bog’s heart began to melt, and in order to save him, Elphaba turned him into the Tin Woodman. Elphaba reconciled with the Wizard. Fiyero felt again the old feelings to Elphaba, and both of them confessed feelings to each other. Meanwhile, a tornado took Dorothy’s house with her dog Toto, and Elphaba sensed trouble: a house fell on her sister Nessarose and the latter died. It turns out that this curse was designed specifically to lure the evil sorceress out and to deal with her. Nothing was left except of mourn the dead sister and conduct Dorothy and Toto in the way to Wizard of Oz. Guards found Elphaba, but when they tried to arrest her, Fiyero gave her a chance to escape, and he surrendered. As a result, the guardians tied Fiyero to the pole and carried him to the field to where Elphaba was. To save her beloved, she turned him into a scarecrow Scarecrow, who did not feel any pain. Elphaba invented to die theatrically in the guise of the Wicked Witch. She asked her friend Glinda not to do anything to restore her good name. Because otherwise, all the people shall be against Glinda. Friends parted forever, and Elphaba gave previously stolen magical book back and promised never again to make good magic, as only misfortune happened to her because of it. They retired with her beloved Fiyero and left in a secret that Elphaba has survived."


In [12]:
# Export new dataframe
names.to_csv('./data/musical_synopses.csv', index=False)

In [None]:
# Actually on it, figure it out
# A chorus line
# Gypsy
# Cinderella
# Billy elliott
# school of rock
# A little night music

...
# Tarzan


In [None]:
res = requests.get('https://www.allmusicals.com/lyrics/secretgardenthe/synopsis.htm')

In [None]:
# Set up error warnings
if res.status_code != 200:
    raise NameError('This is not a valid webpage. Try searching another link.')
        

In [None]:
for name in names['musical']:
    if name[:3] == 'the':
        print(name)

In [None]:
fnurl = baseurl1 + 'findingneverland' + baseurl2

In [None]:
res = requests.get(baseurl1 + 'thephantomoftheoperathe' + baseurl2)

In [None]:
if the title starts with the
try no the
if status_code != 200
try the at the end
try the at te beginning
try the at the beginning and end

if res.status_code != 200:
    try {
        
    }

In [None]:
res = requests.get(fnurl)
res.status_code

In [None]:
soup = BeautifulSoup(res.content)
print(soup.prettify()[:1000])

In [None]:
fn_synopsis = soup.find('div', {'id': 'page'}).text.replace('\r', '')
fn_synopsis_list = [words.strip() for words in fn_synopsis.split('\n')[2:-2]]
fn_synop = ' '.join(fn_synopsis_list).strip()
fn_synop