**Webscraping Artist Data from Current Billboard Top 100 Artist**

In [75]:
# Importing necessary libraries
# BeautifulSoup for HTML parsing
# requests for HTTP requests
# re for regular expressions
# pandas for data manipulation and analysis
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd

Parsing HTML Content from Billboard Top 100 Artist Webpage

In [77]:
# Specifying the URL of the Billboard Artist 100 chart
url = 'https://www.billboard.com/charts/artist-100/'

# Sending an HTTP GET request to the specified URL and store the response
response = requests.get(url)

# Parsing the HTML content of the response using BeautifulSoup
soup = BeautifulSoup(response.text, "lxml")

Scraping Billboard Artist 100 Chart Data and Organizing into a Structured List

In [79]:
# Initializing the list where the data will be stored
data = []

# Iterating through each chart entry in the top 100
for e in soup.find_all(attrs={'class':'o-chart-results-list-row-container'}):
    ranking_span = e.h3.find_previous('span')
    # Adding a while loop to handle cases where "New" or "Re-Entry" labels are present between the artist name and ranking
    while ranking_span.get_text(strip=True) in ['RE-\nENTRY', 'NEW']:
        ranking_span = ranking_span.find_previous('span')
    ranking = int(ranking_span.get_text(strip=True))
    name = e.h3.get_text(strip=True)
    weeks = int(e.h3.find_next('span').find_next('span').find_next('span').get_text(strip=True))

    # Extracting and storing the ranking, artist name, and weeks on chart for each entry
    data.append({
        'Ranking':ranking,
        'Artist Name':name,
        'Weeks on Artist 100 Chart':weeks
    })

# Printing the extracted data
data

[{'Ranking': 1,
  'Artist Name': 'Taylor Swift',
  'Weeks on Artist 100 Chart': 547},
 {'Ranking': 2, 'Artist Name': 'Bad Bunny', 'Weeks on Artist 100 Chart': 330},
 {'Ranking': 3, 'Artist Name': 'SZA', 'Weeks on Artist 100 Chart': 281},
 {'Ranking': 4,
  'Artist Name': 'Kendrick Lamar',
  'Weeks on Artist 100 Chart': 472},
 {'Ranking': 5,
  'Artist Name': 'Morgan Wallen',
  'Weeks on Artist 100 Chart': 306},
 {'Ranking': 6,
  'Artist Name': 'Sabrina Carpenter',
  'Weeks on Artist 100 Chart': 59},
 {'Ranking': 7,
  'Artist Name': 'Billie Eilish',
  'Weeks on Artist 100 Chart': 326},
 {'Ranking': 8, 'Artist Name': 'Bruno Mars', 'Weeks on Artist 100 Chart': 529},
 {'Ranking': 9,
  'Artist Name': 'Chappell Roan',
  'Weeks on Artist 100 Chart': 40},
 {'Ranking': 10,
  'Artist Name': 'Teddy Swims',
  'Weeks on Artist 100 Chart': 71},
 {'Ranking': 11,
  'Artist Name': 'Tyler, The Creator',
  'Weeks on Artist 100 Chart': 187},
 {'Ranking': 12,
  'Artist Name': 'Jelly Roll',
  'Weeks on Artist

Transforming the list into a dataframe

In [81]:
df = pd.DataFrame(data)
df

Unnamed: 0,Ranking,Artist Name,Weeks on Artist 100 Chart
0,1,Taylor Swift,547
1,2,Bad Bunny,330
2,3,SZA,281
3,4,Kendrick Lamar,472
4,5,Morgan Wallen,306
...,...,...,...
95,96,Blake Shelton,353
96,97,Journey,125
97,98,Melanie Martinez,81
98,99,Alex Warren,4


Converting the data table into a csv file

In [83]:
df.to_csv('top100_artists_billboard.csv',index=False)