In [14]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Usuario\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Usuario\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Década de 1960:
The Beatles
The Rolling Stones
The Beach Boys
The Who
The Doors
Led Zeppelin
The Jimi Hendrix Experience
Pink Floyd
The Velvet Underground
The Byrds
The Kinks
The Temptations
The Supremes
The Animals



In [15]:
def obtener_letras_banda(banda_url):
    try:
        stopwords_english = set(stopwords.words('english'))

        def obtener_letra(url):
            try:
                response = requests.get(url)
                response.raise_for_status()
                soup = BeautifulSoup(response.text, 'html.parser')
                letra = ' '.join(p.get_text() for p in soup.select('.lyric-original.font.--lyrics.--size18 p'))
                return letra.strip() if letra else None
            except requests.exceptions.RequestException as e:
                print("Error al obtener letra:", e)
                return None

        def limpieza_texto(texto):
            if texto:
                tokens = word_tokenize(texto)
                tokens = [re.sub(r"[^\w\s]", "", token) for token in tokens]
                tokens = [re.sub(r"\d+", "", token) for token in tokens]
                tokens = [re.sub(r'([a-z])([A-Z])', r'\1 \2', token) for token in tokens]
                tokens = [token.lower() for token in tokens]
                tokens = [token for token in tokens if token and token not in stopwords_english]
                tokens = [token.translate(str.maketrans("áéíóú", "aeiou")) for token in tokens]
                return ' '.join(tokens)
            else:
                return ''

        response = requests.get(banda_url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        song_items = soup.select('.songList-table-row.--song.isVisible')
        canciones = [{'Nombres': item['data-name'], 'Links': item['data-shareurl']} for item in song_items]
        df = pd.DataFrame(canciones)
        df['Letra'] = df['Links'].apply(lambda url: obtener_letra(url))
        df['Letra_Limpia'] = df['Letra'].apply(lambda letra: limpieza_texto(letra))
        return df.rename(columns={"Nombres": "track"})
    except requests.exceptions.RequestException as e:
        print("Error al obtener letras de la banda:", e)
        return None   
def frecuencia_textos(df):
    # Importar stopwords en inglés
    stopwords_english = set(stopwords.words('english'))
    
    # Unir todas las letras limpias en un solo texto y eliminar stopwords
    texto_completo = " ".join(df["Letra_Limpia"])
    tokens = word_tokenize(texto_completo)
    tokens = [token for token in tokens if token.lower() not in stopwords_english]
    
    # Calcular la frecuencia de las palabras
    frecuencia_palabras = FreqDist(tokens)
    
    # Crear un DataFrame con la frecuencia de palabras
    df_frecuencia = pd.DataFrame(frecuencia_palabras.items(), columns=['palabra', 'frecuencia'])
    
    # Ordenar por frecuencia en orden descendente
    df_frecuencia = df_frecuencia.sort_values(by='frecuencia', ascending=False)
    
    return df_frecuencia

banda_url = "https://www.letras.com/the-beatles/"
df = obtener_letras_banda(banda_url)
df_beatles = frecuencia_textos(df)
df_beatles.rename(columns={"frecuencia": "The Beatles"}, inplace=True)

banda_url = "https://www.letras.com/the-rolling-stones/"
df = obtener_letras_banda(banda_url)
df_rolling = frecuencia_textos(df)
df_rolling.rename(columns={"frecuencia": "The Rolling Stones"}, inplace=True)

banda_url = "https://www.letras.com/beach-boys/"
df = obtener_letras_banda(banda_url)
df_beach_boys = frecuencia_textos(df)
df_beach_boys.rename(columns={"frecuencia": "Beach Boys"}, inplace=True)

banda_url = "https://www.letras.com/the-who/"
df = obtener_letras_banda(banda_url)
df_who = frecuencia_textos(df)
df_who.rename(columns={"frecuencia": "The Who"}, inplace=True)

banda_url = "https://www.letras.com/the-doors/"
df = obtener_letras_banda(banda_url)
df_doors = frecuencia_textos(df)
df_doors.rename(columns={"frecuencia": "The Doors"}, inplace=True)

banda_url = "https://www.letras.com/led-zeppelin/"
df = obtener_letras_banda(banda_url)
df_zeppelin = frecuencia_textos(df)
df_zeppelin.rename(columns={"frecuencia": "Led Zeppelin"}, inplace=True)

banda_url = "https://www.letras.com/jimi-hendrix/"
df = obtener_letras_banda(banda_url)
df_hendrix = frecuencia_textos(df)
df_hendrix.rename(columns={"frecuencia": "Jimi Hendrix"}, inplace=True)

banda_url = "https://www.letras.com/pink-floyd/"
df = obtener_letras_banda(banda_url)
df_floyd = frecuencia_textos(df)
df_floyd.rename(columns={"frecuencia": "Pink Floyd"}, inplace=True)

banda_url = "https://www.letras.com/velvet-underground/"
df = obtener_letras_banda(banda_url)
df_velvet = frecuencia_textos(df)
df_velvet.rename(columns={"frecuencia": "Velvet Underground"}, inplace=True)

banda_url = "https://www.letras.com/byrds/"
df = obtener_letras_banda(banda_url)
df_byrds = frecuencia_textos(df)
df_byrds.rename(columns={"frecuencia": "Byrds"}, inplace=True)

banda_url = "https://www.letras.com/the-kinks/"
df = obtener_letras_banda(banda_url)
df_kinks = frecuencia_textos(df)
df_kinks.rename(columns={"frecuencia": "The Kinks"}, inplace=True)

# Para The Temptations
banda_url = "https://www.letras.com/temptations/"
df = obtener_letras_banda(banda_url)
df_temptations = frecuencia_textos(df)
df_temptations.rename(columns={"frecuencia": "The Temptations"}, inplace=True)

# Para The Supremes
banda_url = "https://www.letras.com/the-supremes/"
df = obtener_letras_banda(banda_url)
df_supremes = frecuencia_textos(df)
df_supremes.rename(columns={"frecuencia": "The Supremes"}, inplace=True)

# Para The Animals
banda_url = "https://www.letras.com/the-animals/"
df = obtener_letras_banda(banda_url)
df_animals = frecuencia_textos(df)
df_animals.rename(columns={"frecuencia": "The Animals"}, inplace=True)

# Para The Yardbirds (que ya hemos realizado)
banda_url = "https://www.letras.com/the-yardbirds/"
df = obtener_letras_banda(banda_url)
df_yardbirds = frecuencia_textos(df)
df_yardbirds.rename(columns={"frecuencia": "The Yardbirds"}, inplace=True)

# Realizar el merge de todos los DataFrames
df_merged = pd.merge(df_beatles, df_rolling, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_beach_boys, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_who, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_doors, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_zeppelin, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_hendrix, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_floyd, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_velvet, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_byrds, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_kinks, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_temptations, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_supremes, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_animals, on="palabra", how="outer")
df_merged = pd.merge(df_merged, df_yardbirds, on="palabra", how="outer")


df_merged.fillna(0, inplace=True)
df_merged['total'] = df_merged[['The Beatles', 'The Rolling Stones', 'Beach Boys', 'The Who', 'The Doors', 'Led Zeppelin', 
                                'Jimi Hendrix', 'Pink Floyd', 'Velvet Underground', 'Byrds','The Kinks', 'The Temptations', 
                                'The Supremes', 'The Animals', 'The Yardbirds']].sum(axis=1)

df_merged = df_merged.sort_values(by='total', ascending=False) 
df_merged.to_csv("df_1960.csv", index=False)
top_1960 = pd.read_csv("./df_1960.csv")
top_1960



Unnamed: 0,palabra,The Beatles,The Rolling Stones,Beach Boys,The Who,The Doors,Led Zeppelin,Jimi Hendrix,Pink Floyd,Velvet Underground,Byrds,The Kinks,The Temptations,The Supremes,The Animals,The Yardbirds,total
0,nt,1009.0,1625.0,930.0,829.0,275.0,332.0,297.0,226.0,208.0,389.0,1434.0,734.0,957.0,459.0,250.0,9954.0
1,love,850.0,686.0,811.0,230.0,282.0,236.0,154.0,38.0,62.0,207.0,437.0,752.0,1078.0,278.0,189.0,6290.0
2,baby,442.0,1035.0,425.0,129.0,176.0,282.0,185.0,25.0,51.0,66.0,207.0,493.0,414.0,481.0,169.0,4580.0
3,oh,537.0,591.0,603.0,82.0,66.0,430.0,80.0,47.0,157.0,168.0,577.0,388.0,335.0,289.0,106.0,4456.0
4,got,358.0,722.0,426.0,337.0,167.0,154.0,150.0,134.0,90.0,219.0,846.0,223.0,223.0,173.0,132.0,4354.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19446,arithmetical,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
19447,nonpolitical,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
19448,goodtrips,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
19449,fury,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


## Década de 1970:

- **Queen**
- **Black Sabbath**
- **Fleetwood Mac**
- **Bee Gees**
- **AC/DC**
- **Eagles**
- **The Clash**
- **ABBA**
- **Ramones**
- **Sex Pistols**

In [16]:
banda_url = "https://www.letras.com/queen/"
df = obtener_letras_banda(banda_url)
df_queen = frecuencia_textos(df)
df_queen.rename(columns={"frecuencia": "Queen"}, inplace=True)

banda_url = "https://www.letras.com/black-sabbath/"
df = obtener_letras_banda(banda_url)
df_black_sabbath = frecuencia_textos(df)
df_black_sabbath.rename(columns={"frecuencia": "Black Sabbath"}, inplace=True)

banda_url = "https://www.letras.com/fleetwood-mac/"
df = obtener_letras_banda(banda_url)
df_fleetwood_mac = frecuencia_textos(df)
df_fleetwood_mac.rename(columns={"frecuencia": "Fleetwood Mac"}, inplace=True)

banda_url = "https://www.letras.com/bee-gees/"
df = obtener_letras_banda(banda_url)
df_bee_gees = frecuencia_textos(df)
df_bee_gees.rename(columns={"frecuencia": "Bee Gees"}, inplace=True)

banda_url = "https://www.letras.com/ac-dc/"
df = obtener_letras_banda(banda_url)
df_ac_dc = frecuencia_textos(df)
df_ac_dc.rename(columns={"frecuencia": "AC/DC"}, inplace=True)

banda_url = "https://www.letras.com/the-eagles/"
df = obtener_letras_banda(banda_url)
df_eagles = frecuencia_textos(df)
df_eagles.rename(columns={"frecuencia": "Eagles"}, inplace=True)

banda_url = "https://www.letras.com/the-clash/"
df = obtener_letras_banda(banda_url)
df_the_clash = frecuencia_textos(df)
df_the_clash.rename(columns={"frecuencia": "The Clash"}, inplace=True)

banda_url = "https://www.letras.com/abba/"
df = obtener_letras_banda(banda_url)
df_abba = frecuencia_textos(df)
df_abba.rename(columns={"frecuencia": "ABBA"}, inplace=True)

banda_url = "https://www.letras.com/ramones/"
df = obtener_letras_banda(banda_url)
df_ramones = frecuencia_textos(df)
df_ramones.rename(columns={"frecuencia": "Ramones"}, inplace=True)

banda_url = "https://www.letras.com/sex-pistols/"
df = obtener_letras_banda(banda_url)
df_sex_pistols = frecuencia_textos(df)
df_sex_pistols.rename(columns={"frecuencia": "Sex Pistols"}, inplace=True)


banda_url = "https://www.letras.com/jackson-five/"
df = obtener_letras_banda(banda_url)
df_the_jackson_5 = frecuencia_textos(df)
df_the_jackson_5.rename(columns={"frecuencia": "The Jackson 5"}, inplace=True)

banda_url = "https://www.letras.com/the-carpenters/"
df = obtener_letras_banda(banda_url)
df_the_carpenters = frecuencia_textos(df)
df_the_carpenters.rename(columns={"frecuencia": "The Carpenters"}, inplace=True)

# Para The Village People
banda_url = "https://www.letras.com/the-village-people/"
df = obtener_letras_banda(banda_url)
df_the_village_people = frecuencia_textos(df)
df_the_village_people.rename(columns={"frecuencia": "The Village People"}, inplace=True)

# Para The Stylistics
banda_url = "https://www.letras.com/the-stylistics/"
df = obtener_letras_banda(banda_url)
df_the_stylistics = frecuencia_textos(df)
df_the_stylistics.rename(columns={"frecuencia": "The Stylistics"}, inplace=True)

# Realizar el merge de todos los DataFrames
df_merged_1970 = pd.merge(df_queen, df_black_sabbath, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_fleetwood_mac, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_bee_gees, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_ac_dc, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_eagles, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_the_clash, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_abba, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_ramones, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_sex_pistols, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_the_jackson_5, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_the_carpenters, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_the_village_people, on="palabra", how="outer")
df_merged_1970 = pd.merge(df_merged_1970, df_the_stylistics, on="palabra", how="outer")

df_merged_1970.fillna(0, inplace=True)
df_merged_1970['total'] = df_merged_1970.sum(axis=1)
df_merged_1970 = df_merged_1970.sort_values(by='total', ascending=False) 
df_merged_1970.to_csv("df_1970.csv", index=False)
top_1970 = pd.read_csv("./df_1970.csv")
top_1970



  df_merged_1970['total'] = df_merged_1970.sum(axis=1)


Unnamed: 0,palabra,Queen,Black Sabbath,Fleetwood Mac,Bee Gees,AC/DC,Eagles,The Clash,ABBA,Ramones,Sex Pistols,The Jackson 5,The Carpenters,The Village People,The Stylistics,total
0,nt,785.0,588.0,1021.0,1798.0,674.0,348.0,411.0,398.0,852.0,176.0,1051.0,359.0,126.0,262.0,8849.0
1,love,625.0,238.0,685.0,1829.0,135.0,171.0,35.0,176.0,253.0,86.0,816.0,447.0,75.0,409.0,5980.0
2,know,268.0,174.0,398.0,795.0,154.0,185.0,119.0,190.0,273.0,74.0,476.0,218.0,45.0,99.0,3468.0
3,got,331.0,186.0,273.0,671.0,517.0,105.0,228.0,91.0,191.0,77.0,299.0,120.0,106.0,65.0,3260.0
4,oh,350.0,195.0,286.0,756.0,155.0,88.0,159.0,120.0,436.0,97.0,345.0,96.0,45.0,112.0,3240.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15855,allianza,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
15856,resoundingly,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
15857,commodity,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
15858,tilts,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


Década de 1980:
U2
Michael Jackson
Madonna
Guns N' Roses
Metallica
The Police
Bon Jovi
Duran Duran
The Cure
Prince
Whitney Houston
Depeche Mode
Talking Heads
Tears for Fears
New Order

In [17]:
# Bandas de la década de 1980
banda_url = "https://www.letras.com/u2/"
df = obtener_letras_banda(banda_url)
df_u2 = frecuencia_textos(df)
df_u2.rename(columns={"frecuencia": "U2"}, inplace=True)

banda_url = "https://www.letras.com/michael-jackson/"
df = obtener_letras_banda(banda_url)
df_michael_jackson = frecuencia_textos(df)
df_michael_jackson.rename(columns={"frecuencia": "Michael Jackson"}, inplace=True)

banda_url = "https://www.letras.com/madonna/"
df = obtener_letras_banda(banda_url)
df_madonna = frecuencia_textos(df)
df_madonna.rename(columns={"frecuencia": "Madonna"}, inplace=True)

banda_url = "https://www.letras.com/guns-n-roses/"
df = obtener_letras_banda(banda_url)
df_guns_n_roses = frecuencia_textos(df)
df_guns_n_roses.rename(columns={"frecuencia": "Guns N' Roses"}, inplace=True)


banda_url = "https://www.letras.com/metallica/"
df = obtener_letras_banda(banda_url)
df_metallica = frecuencia_textos(df)
df_metallica.rename(columns={"frecuencia": "Metallica"}, inplace=True)

banda_url = "https://www.letras.com/the-police/"
df = obtener_letras_banda(banda_url)
df_the_police = frecuencia_textos(df)
df_the_police.rename(columns={"frecuencia": "The Police"}, inplace=True)

banda_url = "https://www.letras.com/bon-jovi/"
df = obtener_letras_banda(banda_url)
df_bon_jovi = frecuencia_textos(df)
df_bon_jovi.rename(columns={"frecuencia": "Bon Jovi"}, inplace=True)

banda_url = "https://www.letras.com/duran-duran/"
df = obtener_letras_banda(banda_url)
df_duran_duran = frecuencia_textos(df)
df_duran_duran.rename(columns={"frecuencia": "Duran Duran"}, inplace=True)

banda_url = "https://www.letras.com/the-cure/"
df = obtener_letras_banda(banda_url)
df_the_cure = frecuencia_textos(df)
df_the_cure.rename(columns={"frecuencia": "The Cure"}, inplace=True)


banda_url = "https://www.letras.com/prince/"
df = obtener_letras_banda(banda_url)
df_prince = frecuencia_textos(df)
df_prince.rename(columns={"frecuencia": "Prince"}, inplace=True)

# Para Whitney Houston
banda_url = "https://www.letras.com/whitney-houston/"
df = obtener_letras_banda(banda_url)
df_whitney_houston = frecuencia_textos(df)
df_whitney_houston.rename(columns={"frecuencia": "Whitney Houston"}, inplace=True)

# Para Depeche Mode
banda_url = "https://www.letras.com/depeche-mode/"
df = obtener_letras_banda(banda_url)
df_depeche_mode = frecuencia_textos(df)
df_depeche_mode.rename(columns={"frecuencia": "Depeche Mode"}, inplace=True)

# Para Talking Heads
banda_url = "https://www.letras.com/talking-heads/"
df = obtener_letras_banda(banda_url)
df_talking_heads = frecuencia_textos(df)
df_talking_heads.rename(columns={"frecuencia": "Talking Heads"}, inplace=True)

# Para Tears for Fears
banda_url = "https://www.letras.com/tears-for-fears/"
df = obtener_letras_banda(banda_url)
df_tears_for_fears = frecuencia_textos(df)
df_tears_for_fears.rename(columns={"frecuencia": "Tears for Fears"}, inplace=True)

# Para New Order
banda_url = "https://www.letras.com/new-order/"
df = obtener_letras_banda(banda_url)
df_new_order = frecuencia_textos(df)
df_new_order.rename(columns={"frecuencia": "New Order"}, inplace=True)

# Realizar el merge de todos los DataFrames
df_merged_1980 = pd.merge(df_u2, df_michael_jackson, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_madonna, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_guns_n_roses, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_metallica, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_the_police, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_bon_jovi, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_duran_duran, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_the_cure, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_prince, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_whitney_houston, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_depeche_mode, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_talking_heads, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_tears_for_fears, on="palabra", how="outer")
df_merged_1980 = pd.merge(df_merged_1980, df_new_order, on="palabra", how="outer")

# Llenar NaN con 0
df_merged_1980.fillna(0, inplace=True)

# Calcular la suma total de las frecuencias
df_merged_1980['total'] = df_merged_1980.sum(axis=1)
df_merged_1980 = df_merged_1980.sort_values(by='total', ascending=False) 

# Guardar como CSV
df_merged_1980.to_csv("df_1980.csv", index=False)

# Leer el DataFrame
top_1980 = pd.read_csv("./df_1980.csv")
top_1980

  df_merged_1980['total'] = df_merged_1980.sum(axis=1)


Unnamed: 0,palabra,U2,Michael Jackson,Madonna,Guns N' Roses,Metallica,The Police,Bon Jovi,Duran Duran,The Cure,Prince,Whitney Houston,Depeche Mode,Talking Heads,Tears for Fears,New Order,total
0,nt,1173.0,2217.0,5264.0,956.0,586.0,380.0,2149.0,623.0,743.0,3891.0,709.0,403.0,291.0,243.0,512.0,20140.0
1,love,866.0,1155.0,4050.0,180.0,81.0,106.0,935.0,150.0,304.0,2064.0,738.0,239.0,164.0,166.0,230.0,11428.0
2,u,30.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8636.0,0.0,0.0,3.0,0.0,0.0,8677.0
3,na,340.0,781.0,2554.0,212.0,101.0,77.0,694.0,279.0,4.0,2246.0,266.0,34.0,252.0,31.0,103.0,7974.0
4,like,444.0,503.0,2597.0,190.0,153.0,97.0,681.0,196.0,414.0,1380.0,143.0,193.0,125.0,125.0,215.0,7456.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23163,feature,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
23164,suchknowing,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
23165,touchso,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
23166,diebut,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


Década de 1990:
Nirvana
Pearl Jam
Red Hot Chili Peppers
Oasis
Radiohead
Green Day
R.E.M.
Foo Fighters
Beastie Boys
Nine Inch Nails
Blur
Weezer
No Doubt
Smashing Pumpkins
Soundgarden

In [18]:
# Bandas de la década de 1990
banda_url = "https://www.letras.com/nirvana/"
df = obtener_letras_banda(banda_url)
df_nirvana = frecuencia_textos(df)
df_nirvana.rename(columns={"frecuencia": "Nirvana"}, inplace=True)

banda_url = "https://www.letras.com/pearl-jam/"
df = obtener_letras_banda(banda_url)
df_pearl_jam = frecuencia_textos(df)
df_pearl_jam.rename(columns={"frecuencia": "Pearl Jam"}, inplace=True)

banda_url = "https://www.letras.com/red-hot-chili-peppers/"
df = obtener_letras_banda(banda_url)
df_red_hot_chili_peppers = frecuencia_textos(df)
df_red_hot_chili_peppers.rename(columns={"frecuencia": "Red Hot Chili Peppers"}, inplace=True)

banda_url = "https://www.letras.com/oasis/"
df = obtener_letras_banda(banda_url)
df_oasis = frecuencia_textos(df)
df_oasis.rename(columns={"frecuencia": "Oasis"}, inplace=True)

banda_url = "https://www.letras.com/radiohead/"
df = obtener_letras_banda(banda_url)
df_radiohead = frecuencia_textos(df)
df_radiohead.rename(columns={"frecuencia": "Radiohead"}, inplace=True)

banda_url = "https://www.letras.com/green-day/"
df = obtener_letras_banda(banda_url)
df_green_day = frecuencia_textos(df)
df_green_day.rename(columns={"frecuencia": "Green Day"}, inplace=True)

banda_url = "https://www.letras.com/rem/"
df = obtener_letras_banda(banda_url)
df_rem = frecuencia_textos(df)
df_rem.rename(columns={"frecuencia": "R.E.M."}, inplace=True)

banda_url = "https://www.letras.com/foo-fighters/"
df = obtener_letras_banda(banda_url)
df_foo_fighters = frecuencia_textos(df)
df_foo_fighters.rename(columns={"frecuencia": "Foo Fighters"}, inplace=True)

banda_url = "https://www.letras.com/beastie-boys/"
df = obtener_letras_banda(banda_url)
df_beastie_boys = frecuencia_textos(df)
df_beastie_boys.rename(columns={"frecuencia": "Beastie Boys"}, inplace=True)

banda_url = "https://www.letras.com/nine-inch-nails/"
df = obtener_letras_banda(banda_url)
df_nine_inch_nails = frecuencia_textos(df)
df_nine_inch_nails.rename(columns={"frecuencia": "Nine Inch Nails"}, inplace=True)

# Para Blur
banda_url = "https://www.letras.com/blur/"
df = obtener_letras_banda(banda_url)
df_blur = frecuencia_textos(df)
df_blur.rename(columns={"frecuencia": "Blur"}, inplace=True)

# Para Weezer
banda_url = "https://www.letras.com/weezer/"
df = obtener_letras_banda(banda_url)
df_weezer = frecuencia_textos(df)
df_weezer.rename(columns={"frecuencia": "Weezer"}, inplace=True)

# Para No Doubt
banda_url = "https://www.letras.com/no-doubt/"
df = obtener_letras_banda(banda_url)
df_no_doubt = frecuencia_textos(df)
df_no_doubt.rename(columns={"frecuencia": "No Doubt"}, inplace=True)

# Para Smashing Pumpkins
banda_url = "https://www.letras.com/smashing-pumpkins/"
df = obtener_letras_banda(banda_url)
df_smashing_pumpkins = frecuencia_textos(df)
df_smashing_pumpkins.rename(columns={"frecuencia": "Smashing Pumpkins"}, inplace=True)

# Para Soundgarden
banda_url = "https://www.letras.com/soundgarden/"
df = obtener_letras_banda(banda_url)
df_soundgarden = frecuencia_textos(df)
df_soundgarden.rename(columns={"frecuencia": "Soundgarden"}, inplace=True)

# Realizar el merge de todos los DataFrames
df_merged_1990 = pd.merge(df_nirvana, df_pearl_jam, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_red_hot_chili_peppers, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_oasis, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_radiohead, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_green_day, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_rem, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_foo_fighters, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_beastie_boys, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_nine_inch_nails, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_blur, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_weezer, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_no_doubt, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_smashing_pumpkins, on="palabra", how="outer")
df_merged_1990 = pd.merge(df_merged_1990, df_soundgarden, on="palabra", how="outer")

# Llenar NaN con 0
df_merged_1990.fillna(0, inplace=True)

# Calcular la suma total de las frecuencias
df_merged_1990['total'] = df_merged_1990.sum(axis=1)
df_merged_1990 = df_merged_1990.sort_values(by='total', ascending=False) 
# Guardar como CSV
df_merged_1990.to_csv("df_1990.csv", index=False)

# Leer el DataFrame
top_1990 = pd.read_csv("./df_1990.csv")

  df_merged_1990['total'] = df_merged_1990.sum(axis=1)


In [19]:
top_1990

Unnamed: 0,palabra,Nirvana,Pearl Jam,Red Hot Chili Peppers,Oasis,Radiohead,Green Day,R.E.M.,Foo Fighters,Beastie Boys,Nine Inch Nails,Blur,Weezer,No Doubt,Smashing Pumpkins,Soundgarden,total
0,nt,393.0,839.0,754.0,573.0,329.0,843.0,608.0,543.0,721.0,335.0,395.0,1120.0,404.0,733.0,365.0,8955.0
1,know,157.0,226.0,468.0,316.0,116.0,266.0,310.0,192.0,295.0,171.0,146.0,433.0,148.0,333.0,147.0,3724.0
2,got,94.0,339.0,462.0,210.0,104.0,243.0,151.0,165.0,661.0,106.0,160.0,448.0,115.0,148.0,141.0,3547.0
3,like,81.0,248.0,429.0,137.0,94.0,317.0,185.0,213.0,628.0,108.0,148.0,364.0,128.0,167.0,189.0,3436.0
4,love,87.0,264.0,564.0,173.0,45.0,178.0,133.0,165.0,51.0,39.0,124.0,406.0,139.0,722.0,102.0,3192.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22580,compares,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
22581,payed,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
22582,evermore,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
22583,titillations,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


## Década de 2000:

- **Coldplay**
- **Linkin Park**
- **Eminem**
- **The White Stripes**
- **The Strokes**
- **Arctic Monkeys**
- **Muse**
- **System of a Down**
- **OutKast**
- **The Killers**

In [20]:
# Bandas de la década de 2000
banda_url = "https://www.letras.com/coldplay/"
df = obtener_letras_banda(banda_url)
df_coldplay = frecuencia_textos(df)
df_coldplay.rename(columns={"frecuencia": "Coldplay"}, inplace=True)

banda_url = "https://www.letras.com/linkin-park/"
df = obtener_letras_banda(banda_url)
df_linkin_park = frecuencia_textos(df)
df_linkin_park.rename(columns={"frecuencia": "Linkin Park"}, inplace=True)

banda_url = "https://www.letras.com/eminem/"
df = obtener_letras_banda(banda_url)
df_eminem = frecuencia_textos(df)
df_eminem.rename(columns={"frecuencia": "Eminem"}, inplace=True)

banda_url = "https://www.letras.com/the-white-stripes/"
df = obtener_letras_banda(banda_url)
df_white_stripes = frecuencia_textos(df)
df_white_stripes.rename(columns={"frecuencia": "The White Stripes"}, inplace=True)

banda_url = "https://www.letras.com/the-strokes/"
df = obtener_letras_banda(banda_url)
df_strokes = frecuencia_textos(df)
df_strokes.rename(columns={"frecuencia": "The Strokes"}, inplace=True)

banda_url = "https://www.letras.com/arctic-monkeys/"
df = obtener_letras_banda(banda_url)
df_arctic_monkeys = frecuencia_textos(df)
df_arctic_monkeys.rename(columns={"frecuencia": "Arctic Monkeys"}, inplace=True)

banda_url = "https://www.letras.com/muse/"
df = obtener_letras_banda(banda_url)
df_muse = frecuencia_textos(df)
df_muse.rename(columns={"frecuencia": "Muse"}, inplace=True)

banda_url = "https://www.letras.com/system-of-a-down/"
df = obtener_letras_banda(banda_url)
df_soad = frecuencia_textos(df)
df_soad.rename(columns={"frecuencia": "System of a Down"}, inplace=True)

banda_url = "https://www.letras.com/outkast/"
df = obtener_letras_banda(banda_url)
df_outkast = frecuencia_textos(df)
df_outkast.rename(columns={"frecuencia": "OutKast"}, inplace=True)

banda_url = "https://www.letras.com/the-killers/"
df = obtener_letras_banda(banda_url)
df_killers = frecuencia_textos(df)
df_killers.rename(columns={"frecuencia": "The Killers"}, inplace=True)

# Para Beyoncé
banda_url = "https://www.letras.com/beyonce/"
df = obtener_letras_banda(banda_url)
df_beyonce = frecuencia_textos(df)
df_beyonce.rename(columns={"frecuencia": "Beyoncé"}, inplace=True)

# Para Justin Timberlake
banda_url = "https://www.letras.com/justin-timberlake/"
df = obtener_letras_banda(banda_url)
df_timberlake = frecuencia_textos(df)
df_timberlake.rename(columns={"frecuencia": "Justin Timberlake"}, inplace=True)

# Para 50 Cent
banda_url = "https://www.letras.com/50-cent/"
df = obtener_letras_banda(banda_url)
df_50_cent = frecuencia_textos(df)
df_50_cent.rename(columns={"frecuencia": "50 Cent"}, inplace=True)

# Para Kanye West
banda_url = "https://www.letras.com/kanye-west/"
df = obtener_letras_banda(banda_url)
df_kanye_west = frecuencia_textos(df)
df_kanye_west.rename(columns={"frecuencia": "Kanye West"}, inplace=True)

# Sustituir Foo Fighters por otra banda icónica
banda_url = "https://www.letras.com/amy-winehouse/"
df = obtener_letras_banda(banda_url)
df_amy_winehouse = frecuencia_textos(df)
df_amy_winehouse.rename(columns={"frecuencia": "Amy Winehouse"}, inplace=True)

# Realizar el merge de todos los DataFrames
df_merged_2000 = pd.merge(df_coldplay, df_linkin_park, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_eminem, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_white_stripes, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_strokes, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_arctic_monkeys, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_muse, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_soad, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_outkast, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_killers, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_beyonce, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_timberlake, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_50_cent, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_kanye_west, on="palabra", how="outer")
df_merged_2000 = pd.merge(df_merged_2000, df_amy_winehouse, on="palabra", how="outer")




df_merged_2000.fillna(0, inplace=True)

# Calcular la suma total de las frecuencias
df_merged_2000['total'] = df_merged_2000.sum(axis=1)
df_merged_2000 = df_merged_2000.sort_values(by='total', ascending=False) 
# Guardar como CSV
df_merged_2000.to_csv("df_2000.csv", index=False)

#

  df_merged_2000['total'] = df_merged_2000.sum(axis=1)


In [24]:
df_merged_2000

Unnamed: 0,palabra,Coldplay,Linkin Park,Eminem,The White Stripes,The Strokes,Arctic Monkeys,Muse,System of a Down,OutKast,The Killers,Beyoncé,Justin Timberlake,50 Cent,Kanye West,Amy Winehouse,total
1,nt,537.0,1311.0,5835.0,357.0,496.0,425.0,376.0,381.0,833.0,612.0,2474.0,1838.0,4310.0,3245.0,300.0,23330.0
7,like,257.0,569.0,3837.0,89.0,92.0,173.0,100.0,96.0,642.0,207.0,1302.0,784.0,2676.0,1906.0,95.0,12825.0
12,got,177.0,288.0,2351.0,102.0,96.0,137.0,53.0,55.0,412.0,238.0,1150.0,904.0,2736.0,1448.0,69.0,10216.0
6,get,259.0,246.0,2784.0,100.0,101.0,122.0,51.0,76.0,339.0,114.0,660.0,598.0,3139.0,1468.0,82.0,10139.0
2,know,314.0,429.0,1678.0,145.0,143.0,162.0,116.0,111.0,250.0,194.0,1062.0,790.0,2040.0,1537.0,152.0,9123.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24634,carrotz,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
24633,defender,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
24631,twirler,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
24630,leadaz,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


## Década de 2010:

- **Arctic Monkeys**
- **Imagine Dragons**
- **Adele**
- **Twenty One Pilots**
- **Ed Sheeran**
- **Mumford & Sons**
- **The Black Keys**
- **Tame Impala**
- **Florence + The Machine**
- **Kendrick Lamar**

In [21]:
# Bandas de la década de 2010
banda_url = "https://www.letras.com/arctic-monkeys/"
df = obtener_letras_banda(banda_url)
df_arctic_monkeys_10s = frecuencia_textos(df)
df_arctic_monkeys_10s.rename(columns={"frecuencia": "Arctic Monkeys"}, inplace=True)

banda_url = "https://www.letras.com/imagine-dragons/"
df = obtener_letras_banda(banda_url)
df_imagine_dragons = frecuencia_textos(df)
df_imagine_dragons.rename(columns={"frecuencia": "Imagine Dragons"}, inplace=True)

banda_url = "https://www.letras.com/adele/"
df = obtener_letras_banda(banda_url)
df_adele = frecuencia_textos(df)
df_adele.rename(columns={"frecuencia": "Adele"}, inplace=True)

banda_url = "https://www.letras.com/twenty-one-pilots/"
df = obtener_letras_banda(banda_url)
df_twenty_one_pilots = frecuencia_textos(df)
df_twenty_one_pilots.rename(columns={"frecuencia": "Twenty One Pilots"}, inplace=True)

banda_url = "https://www.letras.com/ed-sheeran/"
df = obtener_letras_banda(banda_url)
df_ed_sheeran = frecuencia_textos(df)
df_ed_sheeran.rename(columns={"frecuencia": "Ed Sheeran"}, inplace=True)

banda_url = "https://www.letras.com/mumford-sons/"
df = obtener_letras_banda(banda_url)
df_mumford_sons = frecuencia_textos(df)
df_mumford_sons.rename(columns={"frecuencia": "Mumford & Sons"}, inplace=True)

banda_url = "https://www.letras.com/the-black-keys/"
df = obtener_letras_banda(banda_url)
df_black_keys = frecuencia_textos(df)
df_black_keys.rename(columns={"frecuencia": "The Black Keys"}, inplace=True)

banda_url = "https://www.letras.com/tame-impala/"
df = obtener_letras_banda(banda_url)
df_tame_impala = frecuencia_textos(df)
df_tame_impala.rename(columns={"frecuencia": "Tame Impala"}, inplace=True)

banda_url = "https://www.letras.com/florence-and-the-machine/"
df = obtener_letras_banda(banda_url)
df_florence_machine = frecuencia_textos(df)
df_florence_machine.rename(columns={"frecuencia": "Florence And The Machine"}, inplace=True)

banda_url = "https://www.letras.com/kendrick-lamar/"
df = obtener_letras_banda(banda_url)
df_kendrick_lamar = frecuencia_textos(df)
df_kendrick_lamar.rename(columns={"frecuencia": "Kendrick Lamar"}, inplace=True)

# Para Lorde
banda_url = "https://www.letras.com/lorde/"
df = obtener_letras_banda(banda_url)
df_lorde = frecuencia_textos(df)
df_lorde.rename(columns={"frecuencia": "Lorde"}, inplace=True)

# Para Bruno Mars
banda_url = "https://www.letras.com/bruno-mars/"
df = obtener_letras_banda(banda_url)
df_bruno_mars = frecuencia_textos(df)
df_bruno_mars.rename(columns={"frecuencia": "Bruno Mars"}, inplace=True)

# Para Drake
banda_url = "https://www.letras.com/drake/"
df = obtener_letras_banda(banda_url)
df_drake = frecuencia_textos(df)
df_drake.rename(columns={"frecuencia": "Drake"}, inplace=True)

# Para The Weeknd
banda_url = "https://www.letras.com/the-weeknd/"
df = obtener_letras_banda(banda_url)
df_weeknd = frecuencia_textos(df)
df_weeknd.rename(columns={"frecuencia": "The Weeknd"}, inplace=True)

# Para Lady Gaga
banda_url = "https://www.letras.com/lady-gaga/"
df = obtener_letras_banda(banda_url)
df_lady_gaga = frecuencia_textos(df)
df_lady_gaga.rename(columns={"frecuencia": "Lady Gaga"}, inplace=True)

# Realizar el merge de todos los DataFrames
df_merged_2010 = pd.merge(df_arctic_monkeys, df_imagine_dragons, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_adele, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_twenty_one_pilots, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_ed_sheeran, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_mumford_sons, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_black_keys, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_tame_impala, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_florence_machine, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_kendrick_lamar, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_lorde, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_bruno_mars, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_drake, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_weeknd, on="palabra", how="outer")
df_merged_2010 = pd.merge(df_merged_2010, df_lady_gaga, on="palabra", how="outer")

# Llenar NaN con 0
df_merged_2010.fillna(0, inplace=True)

# Calcular la suma total de las frecuencias
df_merged_2010['total'] = df_merged_2010.sum(axis=1)
df_merged_2010 = df_merged_2010.sort_values(by='total', ascending=False) 
# Guardar como CSV
df_merged_2010.to_csv("df_2010.csv", index=False)


  df_merged_2010['total'] = df_merged_2010.sum(axis=1)


In [25]:
df_merged_2010

Unnamed: 0,palabra,Arctic Monkeys,Imagine Dragons,Adele,Twenty One Pilots,Ed Sheeran,Mumford & Sons,The Black Keys,Tame Impala,Florence And The Machine,Kendrick Lamar,Lorde,Bruno Mars,Drake,The Weeknd,Lady Gaga,total
0,nt,425.0,561.0,488.0,569.0,1122.0,258.0,416.0,286.0,400.0,1558.0,206.0,757.0,3592.0,1883.0,1646.0,14167.0
2,know,162.0,274.0,134.0,399.0,821.0,153.0,184.0,172.0,140.0,639.0,97.0,417.0,2119.0,1146.0,535.0,7392.0
1,like,173.0,159.0,154.0,126.0,664.0,55.0,118.0,116.0,191.0,835.0,113.0,457.0,2462.0,603.0,643.0,6869.0
4,oh,135.0,847.0,136.0,182.0,624.0,152.0,240.0,63.0,346.0,168.0,96.0,578.0,570.0,1045.0,1291.0,6473.0
13,love,83.0,238.0,290.0,46.0,1031.0,213.0,222.0,63.0,319.0,460.0,93.0,278.0,869.0,1029.0,1032.0,6266.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15135,demonstrations,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
2697,hack,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
15138,adjourning,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
15139,paycheck,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


## Década de 2020:

- **Billie Eilish**
- **Halsey**
- **The Weeknd**
- **Harry Styles**
- **Lana Del Rey**
- **Dua Lipa**
- **Post Malone**
- **Megan Thee Stallion**
- **HAIM**
- **Taylor Swift**

In [22]:
# Bandas de la década actual (2020s)
banda_url = "https://www.letras.com/billie-eilish/"
df = obtener_letras_banda(banda_url)
df_billie_eilish = frecuencia_textos(df)
df_billie_eilish.rename(columns={"frecuencia": "Billie Eilish"}, inplace=True)

banda_url = "https://www.letras.com/halsey/"
df = obtener_letras_banda(banda_url)
df_halsey = frecuencia_textos(df)
df_halsey.rename(columns={"frecuencia": "Halsey"}, inplace=True)

banda_url = "https://www.letras.com/the-weeknd/"
df = obtener_letras_banda(banda_url)
df_the_weeknd = frecuencia_textos(df)
df_the_weeknd.rename(columns={"frecuencia": "The Weeknd"}, inplace=True)

banda_url = "https://www.letras.com/harry-styles/"
df = obtener_letras_banda(banda_url)
df_harry_styles = frecuencia_textos(df)
df_harry_styles.rename(columns={"frecuencia": "Harry Styles"}, inplace=True)

banda_url = "https://www.letras.com/lana-del-rey/"
df = obtener_letras_banda(banda_url)
df_lana_del_rey = frecuencia_textos(df)
df_lana_del_rey.rename(columns={"frecuencia": "Lana Del Rey"}, inplace=True)

banda_url = "https://www.letras.com/dua-lipa/"
df = obtener_letras_banda(banda_url)
df_dua_lipa = frecuencia_textos(df)
df_dua_lipa.rename(columns={"frecuencia": "Dua Lipa"}, inplace=True)

banda_url = "https://www.letras.com/post-malone/"
df = obtener_letras_banda(banda_url)
df_post_malone = frecuencia_textos(df)
df_post_malone.rename(columns={"frecuencia": "Post Malone"}, inplace=True)

banda_url = "https://www.letras.com/megan-thee-stallion/"
df = obtener_letras_banda(banda_url)
df_megan_thee_stallion = frecuencia_textos(df)
df_megan_thee_stallion.rename(columns={"frecuencia": "Megan Thee Stallion"}, inplace=True)

banda_url = "https://www.letras.com/haim/"
df = obtener_letras_banda(banda_url)
df_haim = frecuencia_textos(df)
df_haim.rename(columns={"frecuencia": "HAIM"}, inplace=True)

banda_url = "https://www.letras.com/taylor-swift/"
df = obtener_letras_banda(banda_url)
df_taylor = frecuencia_textos(df)
df_taylor.rename(columns={"frecuencia": "Taylor Swift"}, inplace=True)

# Para Bad Bunny
banda_url = "https://www.letras.com/bad-bunny/"
df = obtener_letras_banda(banda_url)
df_bad_bunny = frecuencia_textos(df)
df_bad_bunny.rename(columns={"frecuencia": "Bad Bunny"}, inplace=True)

# Para Ariana Grande
banda_url = "https://www.letras.com/ariana-grande/"
df = obtener_letras_banda(banda_url)
df_ariana_grande = frecuencia_textos(df)
df_ariana_grande.rename(columns={"frecuencia": "Ariana Grande"}, inplace=True)

# Para Travis Scott
banda_url = "https://www.letras.com/travis-scott/"
df = obtener_letras_banda(banda_url)
df_travis_scott = frecuencia_textos(df)
df_travis_scott.rename(columns={"frecuencia": "Travis Scott"}, inplace=True)

# Para Doja Cat
banda_url = "https://www.letras.com/doja-cat/"
df = obtener_letras_banda(banda_url)
df_doja_cat = frecuencia_textos(df)
df_doja_cat.rename(columns={"frecuencia": "Doja Cat"}, inplace=True)

# Reemplazar BTS por otra banda icónica
banda_url = "https://www.letras.com/blackpink/"
df = obtener_letras_banda(banda_url)
df_blackpink = frecuencia_textos(df)
df_blackpink.rename(columns={"frecuencia": "Blackpink"}, inplace=True)

# Realizar el merge de todos los DataFrames
df_merged_2020 = pd.merge(df_billie_eilish, df_halsey, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_harry_styles, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_lana_del_rey, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_dua_lipa, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_post_malone, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_megan_thee_stallion, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_haim, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_taylor, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_bad_bunny, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_ariana_grande, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_travis_scott, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_doja_cat, on="palabra", how="outer")
df_merged_2020 = pd.merge(df_merged_2020, df_blackpink, on="palabra", how="outer")

# Llenar NaN con 0
df_merged_2020.fillna(0, inplace=True)

# Calcular la suma total de las frecuencias
df_merged_2020['total'] = df_merged_2020.sum(axis=1)
df_merged_2020 = df_merged_2020.sort_values(by='total', ascending=False) 
# Guardar como CSV
df_merged_2020.to_csv("df_2020.csv", index=False)


  df_merged_2020['total'] = df_merged_2020.sum(axis=1)


In [26]:
df_merged_2020

Unnamed: 0,palabra,Billie Eilish,Halsey,Harry Styles,Lana Del Rey,Dua Lipa,Post Malone,Megan Thee Stallion,HAIM,Taylor Swift,Bad Bunny,Ariana Grande,Travis Scott,Doja Cat,Blackpink,total
0,nt,619.0,595.0,302.0,1229.0,1095.0,956.0,1265.0,390.0,2058.0,15.0,2023.0,1050.0,1255.0,200.0,13052.0
2,like,199.0,223.0,81.0,1398.0,412.0,466.0,804.0,54.0,1044.0,19.0,928.0,645.0,981.0,341.0,7595.0
54,yeah,35.0,144.0,42.0,337.0,364.0,642.0,871.0,53.0,308.0,216.0,1319.0,1727.0,760.0,348.0,7166.0
1,know,204.0,368.0,195.0,762.0,670.0,392.0,484.0,268.0,943.0,52.0,1054.0,563.0,394.0,225.0,6574.0
10,got,103.0,204.0,130.0,570.0,592.0,418.0,559.0,48.0,558.0,9.0,955.0,662.0,544.0,125.0,5477.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17916,llegochile,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
17917,segunda,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
17918,tazer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
17919,tirarlo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


In [23]:
eliminar = ["nt","oh","na","ca","ai","ta","wo","da","doo","la","gon","wan","pa","ah","n","r","en","är","som","att",
            "lo","el","du","se","por","gaj","jag","al","tu","ahha","vi","lip","ooh","de","chorus"]

top_1960 = top_1960[~top_1960['palabra'].isin(eliminar)]
top_1970 = top_1970[~top_1970['palabra'].isin(eliminar)]
top_1980 = top_1980[~top_1980['palabra'].isin(eliminar)]
top_1990 = top_1990[~top_1990['palabra'].isin(eliminar)]
top_2000 = top_2000[~top_2000['palabra'].isin(eliminar)]
top_2010 = top_2010[~top_2010['palabra'].isin(eliminar)]
top_2020 = top_2020[~top_2020['palabra'].isin(eliminar)]

NameError: name 'top_2000' is not defined

In [None]:
top_1960 = top_1960.sort_values(by='total', ascending=False) 
top_1960['total'] = top_1960[['The Beatles', 'The Rolling Stones', 'Beach Boys', 'The Who', 'The Doors', 
                        'Led Zeppelin', 'Jimi Hendrix', 'Pink Floyd', 'Velvet Underground', 'Byrds']].sum(axis=1)
top_1960.head(60)

Unnamed: 0,palabra,The Beatles,The Rolling Stones,Beach Boys,The Who,The Doors,Led Zeppelin,Jimi Hendrix,Pink Floyd,Velvet Underground,Byrds,total
1,love,850.0,686.0,811.0,230.0,282.0,236.0,154.0,38.0,62.0,207.0,3556.0
5,baby,442.0,1035.0,425.0,129.0,176.0,282.0,185.0,25.0,51.0,66.0,2816.0
7,got,358.0,722.0,426.0,337.0,167.0,154.0,150.0,134.0,90.0,219.0,2757.0
2,know,593.0,435.0,458.0,303.0,162.0,185.0,103.0,72.0,166.0,202.0,2679.0
4,yeah,469.0,979.0,312.0,198.0,168.0,215.0,229.0,9.0,24.0,47.0,2650.0
13,get,266.0,611.0,286.0,286.0,124.0,60.0,62.0,90.0,37.0,133.0,1955.0
9,come,302.0,410.0,466.0,167.0,107.0,80.0,72.0,49.0,49.0,121.0,1823.0
10,like,276.0,362.0,314.0,244.0,96.0,56.0,94.0,100.0,80.0,113.0,1735.0
17,go,244.0,427.0,412.0,173.0,57.0,88.0,92.0,68.0,40.0,125.0,1726.0
20,time,225.0,308.0,344.0,138.0,101.0,99.0,76.0,95.0,48.0,212.0,1646.0


In [None]:
top_1970 = top_1970.sort_values(by='total', ascending=False) 
top_1970['total'] = top_1970[['Queen', 'Black Sabbath', 'Fleetwood Mac', 'Bee Gees', 'AC/DC', 
                        'Eagles', 'The Clash', 'ABBA', 'Ramones', 'Sex Pistols']].sum(axis=1)
top_1970.head(60)

Unnamed: 0,palabra,Queen,Black Sabbath,Fleetwood Mac,Bee Gees,AC/DC,Eagles,The Clash,ABBA,Ramones,Sex Pistols,total
1,love,625.0,238.0,685.0,1829.0,135.0,171.0,35.0,176.0,253.0,86.0,4233.0
4,got,331.0,186.0,273.0,671.0,517.0,105.0,228.0,91.0,191.0,77.0,2670.0
5,know,268.0,174.0,398.0,795.0,154.0,185.0,119.0,190.0,273.0,74.0,2630.0
10,baby,233.0,30.0,528.0,427.0,129.0,62.0,32.0,97.0,323.0,38.0,1899.0
12,go,190.0,97.0,212.0,445.0,208.0,68.0,146.0,120.0,306.0,89.0,1881.0
8,one,246.0,122.0,220.0,494.0,95.0,86.0,121.0,125.0,114.0,37.0,1660.0
27,like,148.0,77.0,260.0,428.0,166.0,98.0,87.0,201.0,144.0,26.0,1635.0
9,get,244.0,136.0,129.0,331.0,274.0,115.0,128.0,103.0,114.0,25.0,1599.0
6,time,263.0,153.0,240.0,343.0,125.0,92.0,98.0,83.0,128.0,53.0,1578.0
35,never,106.0,143.0,212.0,648.0,71.0,86.0,62.0,97.0,118.0,18.0,1561.0


In [None]:
top_1980['total'] = top_1980[['U2', 'Michael Jackson', 'Madonna', 'Guns N\' Roses', 'Metallica', 
                            'The Police', 'Bon Jovi', 'Duran Duran', 'The Cure']].sum(axis=1)
top_1980 = top_1980.sort_values(by="total", ascending=False) 
top_1980.head(60)


Unnamed: 0,palabra,U2,Michael Jackson,Madonna,Guns N' Roses,Metallica,The Police,Bon Jovi,Duran Duran,The Cure,total
1,love,866.0,1155.0,4050.0,180.0,81.0,106.0,935.0,150.0,304.0,7827.0
4,like,444.0,503.0,2597.0,190.0,153.0,97.0,681.0,196.0,414.0,5275.0
3,know,454.0,685.0,2138.0,328.0,125.0,103.0,612.0,236.0,264.0,4945.0
20,get,228.0,543.0,2695.0,159.0,92.0,36.0,370.0,165.0,163.0,4451.0
8,got,300.0,745.0,1617.0,327.0,156.0,81.0,718.0,180.0,128.0,4252.0
24,yeah,206.0,676.0,1375.0,228.0,329.0,30.0,378.0,79.0,178.0,3479.0
14,let,248.0,446.0,1712.0,99.0,161.0,32.0,362.0,99.0,127.0,3286.0
16,baby,245.0,785.0,1258.0,107.0,53.0,64.0,566.0,151.0,31.0,3260.0
9,one,294.0,416.0,1253.0,165.0,169.0,91.0,541.0,129.0,181.0,3239.0
12,time,255.0,491.0,1229.0,178.0,175.0,75.0,376.0,204.0,214.0,3197.0


In [None]:
top_1990['total'] = top_1990[['Nirvana', 'Pearl Jam', 'Red Hot Chili Peppers', 'Oasis', 'Radiohead', 
                            'Green Day', 'R.E.M.', 'Foo Fighters', 'Beastie Boys', 'Nine Inch Nails']].sum(axis=1)
top_1990 = top_1990.sort_values(by="total", ascending=False)
top_1990.head(60)




Unnamed: 0,palabra,Nirvana,Pearl Jam,Red Hot Chili Peppers,Oasis,Radiohead,Green Day,R.E.M.,Foo Fighters,Beastie Boys,Nine Inch Nails,total
4,got,94.0,339.0,462.0,210.0,104.0,243.0,151.0,165.0,661.0,106.0,2535.0
1,know,157.0,226.0,468.0,316.0,116.0,266.0,310.0,192.0,295.0,171.0,2517.0
9,like,81.0,248.0,429.0,137.0,94.0,317.0,185.0,213.0,628.0,108.0,2440.0
33,get,53.0,172.0,451.0,161.0,123.0,215.0,120.0,141.0,381.0,133.0,1950.0
42,time,42.0,179.0,322.0,197.0,43.0,214.0,188.0,254.0,203.0,77.0,1719.0
6,love,87.0,264.0,564.0,173.0,45.0,178.0,133.0,165.0,51.0,39.0,1699.0
12,go,73.0,193.0,243.0,119.0,82.0,318.0,172.0,233.0,160.0,89.0,1682.0
16,one,67.0,193.0,212.0,118.0,71.0,232.0,161.0,365.0,209.0,52.0,1680.0
2,yeah,107.0,398.0,269.0,128.0,33.0,98.0,169.0,135.0,62.0,26.0,1425.0
17,want,67.0,138.0,302.0,61.0,82.0,228.0,179.0,112.0,97.0,140.0,1406.0


In [None]:
top_2000['total'] = top_2000[['Coldplay', 'Linkin Park', 'Eminem', 'The White Stripes', 'The Strokes', 
                            'Arctic Monkeys', 'Muse', 'System of a Down', 'OutKast', 'The Killers']].sum(axis=1)
top_2000 = top_2000.sort_values(by="total", ascending=False)
top_2000.head(60)



Unnamed: 0,palabra,Coldplay,Linkin Park,Eminem,The White Stripes,The Strokes,Arctic Monkeys,Muse,System of a Down,OutKast,The Killers,total
7,like,257.0,569.0,3837.0,89.0,92.0,173.0,100.0,96.0,642.0,207.0,6062.0
6,get,259.0,246.0,2784.0,100.0,101.0,122.0,51.0,76.0,339.0,114.0,4192.0
12,got,177.0,288.0,2351.0,102.0,96.0,137.0,53.0,55.0,412.0,238.0,3909.0
2,know,314.0,429.0,1678.0,145.0,143.0,162.0,116.0,111.0,250.0,194.0,3542.0
34,back,90.0,420.0,1583.0,40.0,35.0,78.0,58.0,47.0,190.0,116.0,2657.0
4,go,262.0,368.0,1301.0,76.0,85.0,117.0,38.0,82.0,138.0,143.0,2610.0
10,see,205.0,362.0,1199.0,55.0,85.0,71.0,70.0,101.0,185.0,92.0,2425.0
30,cause,102.0,175.0,1633.0,44.0,52.0,74.0,50.0,19.0,177.0,65.0,2391.0
21,one,132.0,280.0,1159.0,95.0,60.0,95.0,68.0,89.0,174.0,92.0,2244.0
15,never,162.0,330.0,1015.0,74.0,96.0,72.0,107.0,123.0,124.0,93.0,2196.0


In [None]:
top_2010['total'] = top_2010[['Arctic Monkeys', 'Imagine Dragons', 'Adele', 'Twenty One Pilots', 'Ed Sheeran', 
                            'Mumford & Sons', 'The Black Keys', 'Tame Impala', 'Florence And The Machine', 
                            'Kendrick Lamar']].sum(axis=1)
top_2010 = top_2010.sort_values(by="total", ascending=False)
top_2010.head(60)


Unnamed: 0,palabra,Arctic Monkeys,Imagine Dragons,Adele,Twenty One Pilots,Ed Sheeran,Mumford & Sons,The Black Keys,Tame Impala,Florence And The Machine,Kendrick Lamar,total
2,know,162.0,274.0,134.0,399.0,821.0,153.0,184.0,172.0,140.0,639.0,3078.0
13,love,83.0,238.0,290.0,46.0,1031.0,213.0,222.0,63.0,319.0,460.0,2965.0
1,like,173.0,159.0,154.0,126.0,664.0,55.0,118.0,116.0,191.0,835.0,2591.0
3,got,137.0,254.0,45.0,85.0,398.0,28.0,193.0,82.0,99.0,685.0,2006.0
6,go,117.0,222.0,127.0,128.0,420.0,62.0,146.0,104.0,101.0,312.0,1739.0
5,get,122.0,197.0,68.0,121.0,258.0,56.0,101.0,44.0,128.0,536.0,1631.0
20,never,72.0,199.0,133.0,88.0,436.0,56.0,97.0,58.0,203.0,277.0,1619.0
23,time,70.0,195.0,89.0,165.0,389.0,124.0,100.0,107.0,80.0,249.0,1568.0
27,let,61.0,232.0,148.0,117.0,324.0,66.0,111.0,58.0,182.0,239.0,1538.0
9,one,95.0,252.0,68.0,140.0,311.0,54.0,116.0,117.0,117.0,244.0,1514.0


In [None]:
top_2020['total'] = top_2020[['Billie Eilish', 'Halsey', 'The Weeknd', 'Harry Styles', 'Lana Del Rey', 
                            'Dua Lipa', 'Post Malone', 'Megan Thee Stallion', 'HAIM', 'Taylor Swift']].sum(axis=1)
top_2020 = top_2020.sort_values(by="total", ascending=False)
top_2020.head(60)

Unnamed: 0,palabra,Billie Eilish,Halsey,The Weeknd,Harry Styles,Lana Del Rey,Dua Lipa,Post Malone,Megan Thee Stallion,HAIM,Taylor Swift,total
1,know,204.0,368.0,1146.0,195.0,762.0,670.0,392.0,484.0,268.0,943.0,5432.0
2,like,199.0,223.0,603.0,81.0,1398.0,412.0,466.0,804.0,54.0,1044.0,5284.0
4,love,155.0,271.0,1029.0,88.0,950.0,451.0,188.0,139.0,137.0,807.0,4215.0
10,got,103.0,204.0,718.0,130.0,570.0,592.0,418.0,559.0,48.0,558.0,3900.0
54,yeah,35.0,144.0,976.0,42.0,337.0,364.0,642.0,871.0,53.0,308.0,3772.0
41,baby,42.0,95.0,697.0,85.0,978.0,380.0,192.0,132.0,109.0,558.0,3268.0
28,get,61.0,84.0,350.0,115.0,565.0,265.0,182.0,547.0,105.0,336.0,2610.0
20,cause,70.0,173.0,472.0,69.0,381.0,370.0,147.0,260.0,89.0,573.0,2604.0
9,never,107.0,142.0,335.0,108.0,302.0,176.0,255.0,114.0,188.0,694.0,2421.0
11,go,92.0,109.0,334.0,103.0,369.0,372.0,238.0,177.0,125.0,491.0,2410.0
