In [1]:
import pandas as pd
import re
import requests
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

In [2]:
chrome_options = Options()
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(options=chrome_options)
driver.get("https://streamscharts.com/top-channels?sortBy=peak_viewers")
driver.implicitly_wait(10)
html = driver.page_source
driver.quit()

In [3]:

webpage = bs(html, 'html.parser')
print(webpage.prettify())


<html class="overflow-x-hidden dark filter-right smooth_scroll" lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <meta content="fiLIDRu8T6Rw4IAPLH3iARW4YKjrCyhD96mudJkC" name="csrf-token"/>
  <link href="https://static.streamscharts.com/favicon/sc-favicon.png" rel="apple-touch-icon" sizes="128x128"/>
  <link href="https://static.streamscharts.com/favicon/sc-favicon.png" rel="icon" sizes="128x128" type="image/png"/>
  <meta content="#050509" name="theme-color"/>
  <title>
   Streamers With the Highest Viewers · Streams Charts
  </title>
  <meta content="Learn statistics about Twitch, Trovo, NimoTV, Bigo LIVE, Nonolive, AfreecaTV, Mildom, Kick influencers with most concurrent viewers ➥ Detail stats about streamers with the highest record number of viewers - Viewers, Followers and more!" name="description"/>
  <meta content="Streamers With the Highest Viewers · Streams Charts" property="og:title"/>
  <meta content="Lear

In [4]:
table = webpage.find("table", {"x-ref":"table"})
print(table.prettify())

<table class="md-max:min-w-max w-full bg-card text-secondary screen-content" wire:key="1700529649" x-ref="table">
 <thead class="relative text-xs font-bold leading-4 text-left capitalize -z-1">
  <tr class="">
   <th class="border-b border-gray-dark th-cell pl-6 pr-0 w-12 cell_export h-14" data-filterid="sorting-table-th" data-value='&lt;svg class="fill-current w-4 h-4 flex-shrink-0 w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16"&gt;&lt;path d="M5.333 2.667H14V4H5.333V2.667zM2 2.333h2v2H2v-2zM2 7h2v2H2V7zm0 4.667h2v2H2v-2zm3.333-4.334H14v1.334H5.333V7.333zm0 4.667H14v1.333H5.333V12z"/&gt;&lt;/svg&gt;'>
    <div class="opacity-75 justify-start flex flex-1 items-center">
     <svg class="fill-current w-4 h-4 flex-shrink-0 w-4 h-4" height="16" viewbox="0 0 16 16" width="16" xmlns="http://www.w3.org/2000/svg">
      <path d="M5.333 2.667H14V4H5.333V2.667zM2 2.333h2v2H2v-2zM2 7h2v2H2V7zm0 4.667h2v2H2v-2zm3.333-4.334H14v1.334H5.333V7.333zm0 4.667H14v1.3

In [5]:
column_names = ["Rank", "Platform", "Channel Name", "Partnered", "Gender", "Country", "Region", "My Lists", "Sort", "Followers", "Peak Viewers", "Average Viewers", "Popular Categories"]
table_rows = table.find("tbody").find_all("tr")

In [6]:
l = []
for tr in table_rows:
    td = tr.find_all("td")
    row = [str(tr.get_text()).strip() for tr in td]
    l.append(row)

df = pd.DataFrame(l, columns=column_names)
df.head()

Unnamed: 0,Rank,Platform,Channel Name,Partnered,Gender,Country,Region,My Lists,Sort,Followers,Peak Viewers,Average Viewers,Popular Categories
0,1,,ibai\n\n\nES,,,,es,,,15 308 628,"Jul 01, 2023\n\n3 442 745",55 651,3.0
1,2,,NIMO GALA 2023\n\n\nVI,,,,vi,,,0,"Apr 08, 2023\n\n3 043 120",37 218,
2,3,,TheGrefg\n\n\nES,,,,es,,,11 688 018,"Jan 11, 2021\n\n2 470 347",24 613,3.0
3,4,,MPL ID\n\n\nID,,,,id,,,0,"Apr 09, 2022\n\n1 503 628",42 080,
4,5,,Squeezie\n\n\nFR,,,,fr,,,4 832 764,"Sep 09, 2023\n\n1 340 960",23 069,3.0


In [7]:
df.drop(df.tail(5).index, inplace=True)

In [8]:
drop_col = ["Partnered", "Gender", "Country", "My Lists", "Sort"]
df = df.drop(columns=drop_col)

In [9]:
df.loc[1, "Followers"] = "52,009"
df.loc[3, "Followers"] = "1,421,167"
df.loc[11, "Followers"] = "2,331,509"

In [10]:
df["Channel Name"] = [name.replace("\n", "")[:-2] for name in df["Channel Name"]]
df["Region"] = [region.upper() for region in df["Region"]]
df["Peak Viewers"] = [viewers[14:].replace(" ", ",") for viewers in df["Peak Viewers"]]
df["Average Viewers"] = [average.replace(" ", ",") for average in df["Average Viewers"]]
df["Followers"] = [followers.replace(" ", ",") for followers in df["Followers"]]

In [11]:
first_fill_values = []
svg_elements = table.find_all("svg")

for svg in svg_elements:
    path_elements = svg.find_all("path")
    for path in path_elements:
        fill_value = path.get("fill")
        if fill_value is not None:
            first_fill_values.append(fill_value)
            break

In [12]:
first_fill_values

['#9147FF',
 '#622DF7',
 '#9147FF',
 '#622DF7',
 '#9147FF',
 '#9147FF',
 '#9147FF',
 '#9147FF',
 '#9147FF',
 '#9147FF',
 '#9147FF',
 '#622DF7',
 '#9147FF',
 '#050509',
 '#9147FF',
 '#9147FF',
 '#9147FF',
 '#9147FF',
 '#9147FF',
 '#9147FF']

In [13]:
platform_dict = {"#9147FF":"Twitch", "#622DF7":"Nimo TV", "#050509":"Kick"}

In [14]:
platform_list = [platform_dict[key] for key in first_fill_values]
platform_list

['Twitch',
 'Nimo TV',
 'Twitch',
 'Nimo TV',
 'Twitch',
 'Twitch',
 'Twitch',
 'Twitch',
 'Twitch',
 'Twitch',
 'Twitch',
 'Nimo TV',
 'Twitch',
 'Kick',
 'Twitch',
 'Twitch',
 'Twitch',
 'Twitch',
 'Twitch',
 'Twitch']

In [16]:
df["Platform"] = platform_list

In [17]:
pattern = re.compile(r'ticker-hover px-2 t-cell bg-(card|row) pr-6 cell_export_hidden')
td_elements = table.find_all("td", {"class":pattern})

In [18]:
data_tippy_content = []

for td in td_elements:
    td_list = []
    div_elements = td.find_all("div", {"class":"x-img t_i_game-avatar"})
    plus3 = td.find_all("div", {"class":"t_i_game-avatar bg-gray-dark rounded flex items-center justify-center text-xs text-secondary"})
    for div in div_elements:
        tippy_value = div.get("data-tippy-content")
        if tippy_value is not None:
            td_list.append(tippy_value)
    for three in plus3:
        value3 = three.get("data-tippy-content")
        if value3 is not None:
            td_list.append(value3)
    data_tippy_content.append(td_list)

In [19]:
data_tippy_content

[['Just Chatting', 'League of Legends', 'Sports, Minecraft, Among Us'],
 ['GTA5'],
 ['Just Chatting', 'Fortnite', 'Minecraft, Clash Royale, Valorant'],
 ['Mobile Legends'],
 ['Just Chatting', 'League of Legends', 'Valorant, Among Us, Minecraft'],
 ['Just Chatting', 'World of Warcraft', 'League of Legends, Rust, '],
 ['Just Chatting',
  'Grand Theft Auto V',
  'League of Legends, Fall Guys: Ultimate Knockout, FIFA 21'],
 ['Magic: The Gathering',
  'Rocket League',
  'Just Chatting, Mortal Kombat 11, FIFA 21'],
 ['Sports', 'Special Events'],
 ['Mortal Kombat 11',
  'Tekken 7',
  'Apex Legends, Soulcalibur VI, Guilty Gear -Strive-'],
 ['League of Legends',
  'Teamfight Tactics',
  'League of Legends: Wild Rift, Valorant, Legends of Runeterra'],
 ['GTA5', 'PUBG'],
 ['Dota 2'],
 ['Just Chatting',
  'Call of Duty: Warzone 2.0, Special Events, Call of Duty: Warzone'],
 ['Unknown'],
 ['Valorant',
  'Just Chatting',
  'World of Warcraft, TrackMania, Special Events'],
 ['Fortnite'],
 ['Minecraft

In [20]:
df["Popular Categories"] = data_tippy_content

In [21]:
columns_to_int = ['Followers', 'Peak Viewers', 'Average Viewers']
df[columns_to_int] = df[columns_to_int].apply(lambda x: x.str.replace(',', '').astype(int))

In [22]:
def split_categories(list_input):
    return str(list_input).replace("'", "").strip("[]").split(", ")

df["Popular Categories"] = df["Popular Categories"].apply(lambda x: split_categories(x))

In [27]:
df["Popular Categories"] = [item for item in df["Popular Categories"] if item != '']
df["Popular Categories"]

0     [Just Chatting, League of Legends, Sports, Min...
1                                                [GTA5]
2     [Just Chatting, Fortnite, Minecraft, Clash Roy...
3                                      [Mobile Legends]
4     [Just Chatting, League of Legends, Valorant, A...
5     [Just Chatting, World of Warcraft, League of L...
6     [Just Chatting, Grand Theft Auto V, League of ...
7     [Magic: The Gathering, Rocket League, Just Cha...
8                              [Sports, Special Events]
9     [Mortal Kombat 11, Tekken 7, Apex Legends, Sou...
10    [League of Legends, Teamfight Tactics, League ...
11                                         [GTA5, PUBG]
12                                             [Dota 2]
13    [Just Chatting, Call of Duty: Warzone 2.0, Spe...
14                                            [Unknown]
15    [Valorant, Just Chatting, World of Warcraft, T...
16                                           [Fortnite]
17    [Minecraft, Just Chatting, Fall Guys: Ulti

In [47]:
df

Unnamed: 0,Rank,Platform,Channel Name,Region,Followers,Peak Viewers,Average Viewers,Popular Categories
0,1,Twitch,ibai,ES,15308628,3442745,55651,"[Just Chatting, League of Legends, Sports, Min..."
1,2,Nimo TV,NIMO GALA 2023,VI,52009,3043120,37218,[GTA5]
2,3,Twitch,TheGrefg,ES,11688018,2470347,24613,"[Just Chatting, Fortnite, Minecraft, Clash Roy..."
3,4,Nimo TV,MPL ID,ID,1421167,1503628,42080,[Mobile Legends]
4,5,Twitch,Squeezie,FR,4832764,1340960,23069,"[Just Chatting, League of Legends, Valorant, A..."
5,6,Twitch,elxokas,ES,3632399,1208144,11904,"[Just Chatting, World of Warcraft, League of L..."
6,7,Twitch,aminematue,FR,2369866,1155060,16382,"[Just Chatting, Grand Theft Auto V, League of ..."
7,8,Twitch,ELEAGUE TV,EN,1602862,1130760,7069,"[Magic: The Gathering, Rocket League, Just Cha..."
8,9,Twitch,kingsleague,ES,3001424,1108295,122083,"[Sports, Special Events]"
9,10,Twitch,PlayStation,EN,867020,1016962,3986,"[Mortal Kombat 11, Tekken 7, Apex Legends, Sou..."


In [48]:
df.to_csv("peak_view_streamers.csv", index = False)