In [3]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

def blade_scraper():
    url = 'https://web.archive.org/web/20240701200008/https://socialblade.com/youtube/top/100'
    page = requests.get(url)
    soup = BeautifulSoup(page.text, 'lxml')  
    # Extracting column titles
    styles = ['50px', '70px', '350px', '80px', '150px']
    divs = [soup.find('div', style=f'float: left; width: {style};') for style in styles] + [soup.find_all('div', style='float: left; width: 150px;')[1]]
    titles = [div.get_text(strip=True) for div in divs]   
    # Creating DataFrame
    df = pd.DataFrame(columns=titles)    
    # Extracting rank
    ranks = [div.get_text(strip=True) for div in soup.find_all('div', style="float: left; width: 50px; color:#888;")]
    df['Rank'] = ranks   
    # Extracting grades
    grade_styles = ['#00bee7', '#69ce10', '#5ac10e']
    grades = [grade.get_text(strip=True) for style in grade_styles for grade in soup.find_all('span', style=f"font-weight: bold; color:{style};")]
    grades.append('A')
    df['Grade'] = grades  
    # Extracting usernames
    usernames = [user.get_text(strip=True) for user in soup.find_all('a')[177:277]]
    df['Username'] = usernames 
    # Extracting uploads
    uploads = [span.get_text(strip=True) for span in soup.find_all('span', style="color:#555;")[0:155:2]] + \
              [span.get_text(strip=True) for span in soup.find_all('span', style="color:#555;")[157:200:2]]
    uploads.insert(80, soup.find('span', style="color:#ccc;").get_text())
    df['Uploads'] = uploads    
    # Extracting subscribers
    subs = [div.get_text(strip=True) for div in soup.find_all('div', style="float: left; width: 150px;")[2::2]]
    df['Subs'] = subs   
    # Extracting video views
    views = [span.get_text(strip=True) for span in soup.find_all('span', style="color:#555;")[1:156:2]] + \
            [span.get_text(strip=True) for span in soup.find_all('span', style="color:#555;")[156::2]]
    df['Video Views'] = views  
    return df


In [4]:
blade_scraper()
df = blade_scraper()
df

Unnamed: 0,Rank,Grade,Username,Uploads,Subs,Video Views
0,1st,A++,Zadruga,163558,2.36M,13228813474
1,2nd,A++,Zee Bangla,141986,16.8M,10995846052
2,3rd,A++,Луномосик,849,15.3M,8362822768
3,4th,A++,Anaya Kandhal,1426,35.2M,30464888606
4,5th,A++,MrBeast,804,294M,53334114736
...,...,...,...,...,...,...
95,96th,A,Sita gurjar official,771,9.33M,5855567071
96,97th,A,Crafts people,10422,12.1M,16718803505
97,98th,A,Dushyant kukreja,1208,42.2M,25694128353
98,99th,A,Bahauddin Dije,1048,2.21M,1286775633


In [5]:
pd.set_option('display.max.rows', 101)
df

Unnamed: 0,Rank,Grade,Username,Uploads,Subs,Video Views
0,1st,A++,Zadruga,163558,2.36M,13228813474
1,2nd,A++,Zee Bangla,141986,16.8M,10995846052
2,3rd,A++,Луномосик,849,15.3M,8362822768
3,4th,A++,Anaya Kandhal,1426,35.2M,30464888606
4,5th,A++,MrBeast,804,294M,53334114736
5,6th,A++,ToRung,287,24.8M,15831418434
6,7th,A++,T-Series,21292,268M,259452413707
7,8th,A++,Zadruga Official,28160,1.3M,2721796137
8,9th,A++,★ Hadassa Magic,787,3.77M,2741151130
9,10th,A++,KL BRO Biju Rithvik,2576,51.6M,41473531074


In [6]:
def convert_abbreviated_number(number):
    if 'M' in number:
        return int(float(number.replace('M', '')) * 1_000_000)
    elif 'K' in number:
        return int(float(number.replace('K', '')) * 1_000)
    return int(number)

df['Subs'] = df['Subs'].apply(convert_abbreviated_number)

In [7]:
df

Unnamed: 0,Rank,Grade,Username,Uploads,Subs,Video Views
0,1st,A++,Zadruga,163558,2360000,13228813474
1,2nd,A++,Zee Bangla,141986,16800000,10995846052
2,3rd,A++,Луномосик,849,15300000,8362822768
3,4th,A++,Anaya Kandhal,1426,35200000,30464888606
4,5th,A++,MrBeast,804,294000000,53334114736
5,6th,A++,ToRung,287,24800000,15831418434
6,7th,A++,T-Series,21292,268000000,259452413707
7,8th,A++,Zadruga Official,28160,1300000,2721796137
8,9th,A++,★ Hadassa Magic,787,3770000,2741151130
9,10th,A++,KL BRO Biju Rithvik,2576,51600000,41473531074


In [11]:
df['Video Views'] = df['Video Views'].replace(',', '', regex=True)
df['Uploads'] = df['Uploads'].replace(',', '', regex=True)
df

Unnamed: 0,Rank,Grade,Username,Uploads,Subs,Video Views
0,1st,A++,Zadruga,163558,2360000,13228813474
1,2nd,A++,Zee Bangla,141986,16800000,10995846052
2,3rd,A++,Луномосик,849,15300000,8362822768
3,4th,A++,Anaya Kandhal,1426,35200000,30464888606
4,5th,A++,MrBeast,804,294000000,53334114736
5,6th,A++,ToRung,287,24800000,15831418434
6,7th,A++,T-Series,21292,268000000,259452413707
7,8th,A++,Zadruga Official,28160,1300000,2721796137
8,9th,A++,★ Hadassa Magic,787,3770000,2741151130
9,10th,A++,KL BRO Biju Rithvik,2576,51600000,41473531074
