# **"BABY" IN LED ZEPPELIN'S SONGS LYRICS**

In [1]:
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

import re
import pandas as pd

## **1. WEB PARSING**

**— Parsing Led Zeppelin's songs lyrics from [GENIUS](https://genius.com).** 

In [2]:
albums = ['Led Zeppelin', 'Led Zeppelin II', 'Led Zeppelin III', 'Led Zeppelin IV', 
          'Houses of the Holy', 'Physical Graffiti', 'Presence', 'In Through the Out Door', 'Coda']

albums_list = []
songs_list = []

for album in albums:
    
    album_link = album.lower().replace(' ', '-')
    
    request = Request(f'https://genius.com/albums/Led-zeppelin/{album_link}', headers = {'User-Agent' : 'Mozilla/5.0'})
    page = urlopen(request, timeout = 10).read()
    
    soup = BeautifulSoup(page, 'html.parser')
    
    all_songs = soup.find_all(class_ = 'chart_row-content-title')
    
    for song in all_songs:
        
        song_name = song.contents[0].text.strip().replace('"', '')
        
        albums_list.append(album)
        songs_list.append(song_name)

In [3]:
lyrics_list = []

for song in songs_list:
    
    song_link = str('led-zeppelin-' + song + '-lyrics').lower().replace(' ', '-').replace('(', '').replace(')', '').replace('[', '').replace(']', '').replace("'", "")
    
    request = Request(f'https://genius.com/{song_link}', headers = {'User-Agent' : 'Mozilla/5.0'})
    page = urlopen(request, timeout = 10).read()
    
    soup = BeautifulSoup(page, 'html.parser')
    
    lyrics = soup.find(class_ = 'jYfhrf')
    
    lyrics_text = ''
    
    if lyrics is not None:
        
        lyrics_text = ' '.join([str(i) for i in lyrics.contents])
        lyrics_text = re.sub('<.*?>', ' ', lyrics_text)
        lyrics_text = re.sub(' +', ' ', lyrics_text)
        
    lyrics_list.append(lyrics_text)

**— Merging ```'albums_list'```, ```'songs_list'``` and ```'lyrics_list'``` into a DataFrame.**

In [4]:
df = pd.DataFrame(list(zip(albums_list, songs_list, lyrics_list)), 
                  columns = ['album', 'song', 'lyrics'])

## **2. COUNT BABIES**

**— Counting the number of "baby" and "babe" words.**

In [5]:
def count_baby(lyrics):
    
    lower_lyrics = lyrics.lower()
    
    baby_substring = 'baby'
    baby_count = lower_lyrics.count(baby_substring)
    
    babe_substring = 'babe'
    babe_count = lower_lyrics.count(babe_substring)
    
    count = baby_count + babe_count
    
    return count

In [6]:
df['baby_count'] = df['lyrics'].apply(count_baby)

**— Main statistics on babies.**

In [7]:
print('Number of Songs with Baby in Lyrics:', len(df[df['baby_count'] > 0]))
print('Percentage of Songs with Baby in Lyrics:', round(len(df[df['baby_count'] > 0]) / len(df) * 100, 1))

Number of Songs with Baby in Lyrics: 47
Percentage of Songs with Baby in Lyrics: 58.0


In [8]:
print('Avarage Baby Count:', round(df['baby_count'].mean(), 1))
print('Max Baby Count:', df['baby_count'].max())

Avarage Baby Count: 3.4
Max Baby Count: 35


**— The babiest songs.**

In [9]:
df[['album', 'song', 'baby_count']].sort_values(by = 'baby_count', ascending = False).reset_index(drop = True).head(10)

Unnamed: 0,album,song,baby_count
0,Presence,Candy Store Rock,35
1,Led Zeppelin,Babe I'm Gonna Leave You,28
2,Led Zeppelin IV,Four Sticks,20
3,Led Zeppelin II,Ramble On,14
4,Houses of the Holy,D'yer Mak'er,12
5,Physical Graffiti,Boogie with Stu,12
6,Led Zeppelin II,The Lemon Song,9
7,Led Zeppelin II,Bring It On Home,9
8,Led Zeppelin,You Shook Me,8
9,Led Zeppelin,I Can't Quit You Baby,8


In [10]:
df[df['song'] == 'Candy Store Rock']['lyrics'].tolist()

["[Verse 1] Well Oh baby baby, don't you want a man like me? Oh baby baby, I'm just as sweet as anybody could be Oh baby baby, I want to look to your eyes of blue Oh baby baby, it's more than anybody else could do [Verse 2] Well Oh baby baby, you know when I see you walking down the street Oh baby baby, well you looking good enough to eat Oh baby baby, I don't believe I've tasted this before Oh baby baby, I want it now and every mouthful more of you [Hook] Talk about you, talk about you, talk about you [Verse 3] Oh baby baby, you know that I wanted it more Oh baby baby, I'm about to kiss goodbye to this store Oh baby baby, it ain't the wrapping that sells the goods Oh baby baby, I got a sweet tooth when my mouth is full of you [Hook] Talk about you, talk about you, talk about you Talk about you, talk about you, talk about you [Chorus] Ooh baby, oh Baby it's alright, it's alright Ooh Baby it's alright, it's alright [Verse 4] Well Oh baby, baby, oh you sting like a bee Oh baby, baby, I l

**— The babiest albums.**

In [11]:
df.groupby('album')['baby_count'].sum().reset_index().sort_values(by = 'baby_count', ascending = False).reset_index(drop = True)

Unnamed: 0,album,baby_count
0,Led Zeppelin,59
1,Presence,51
2,Led Zeppelin II,41
3,Led Zeppelin IV,36
4,Physical Graffiti,31
5,In Through the Out Door,18
6,Coda,17
7,Houses of the Holy,17
8,Led Zeppelin III,8
