In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import os
from tqdm import tqdm
import json

# List of keywords to search for
keywords = ['Premium+', 'Premium', 'Bauteam', 'Spieler', 'Entwickler', 'VIP', 'Content', 'Supporter', 'Owner', 'Moderator']

# Dictionaries to store results
player_data = {}
player_categories = {keyword: [] for keyword in keywords}

# Set up Chrome WebDriver
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(options=chrome_options)

# Check OS and set directory
userOS = os.name
if "nt" in userOS:
    directory = os.getcwd() + "\\in-new"
elif "posix" in userOS:
    directory = os.getcwd() + "/in-new"
else:
    directory = None

# Get player list
file_names = []
for file in os.listdir(directory):
    if file.endswith(".txt") and "log." not in file:
        file_names.append(file[:-4])

playerList = sorted(file_names)
renamed = ["BacardiSprite", "187leonbande", "Bannbaar", "BearchenGott", "H1ghsay", "Liichtenstein", "Kosmaxnaut", "Livgo", "Lubotter2009", 'JustK1lling', 'schmaarek', "ymq_", "zTotito", "Radiokopf", "toxicplace", "KMaxN"]
for i in renamed:
    try: playerList.remove(i)
    except: ValueError(i + " not in list.")

newPlayers = ["RasenLP", "FluffyDragon2007", "Tim_Voltia", "BunnyKiko", "BauHD", "Joe3346", "rotmann2", "TAMISH009", "Muniix", "redbrll"]

playerList.extend(newPlayers)

playerLower = [i.lower() for i in playerList]

playerList = sorted(playerList)

playerList = sorted(list(dict.fromkeys(playerList)))

# Function to scrape data for a single player
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def scrape_player_data(player_name, log=False):
    url = f"https://stats.cytooxien.de/player/{player_name}"
    driver.get(url)
    time.sleep(3.2)
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    if log: logging.info(f"Scraping data for player: {player_name}")

    player_data[player_name] = {
        "name": player_name,
        "rank": "None",
        "minecraft_party": {}
    }

    # Find player rank
    player_ranks = soup.find_all('span', class_='player-rank')
    for rank in player_ranks:
        rank_text = rank.get_text(strip=True)
        if rank_text in keywords:
            player_data[player_name]["rank"] = rank_text
            player_categories[rank_text].append(player_name)
            if log: logging.info(f"Player rank: {rank_text}")
            break

    # Find Minecraft Party game mode
    game_modes = soup.find_all('div', class_='stats-gamemode')
    
    for mode in game_modes:
        title = mode.find('h3').text.strip()
        
        if title == "Minecraft Party":
            if log: logging.info("Found Minecraft Party data")
            stats_contents = mode.find_all('div', class_='stats-content')
            
            for stat in stats_contents:
                stat_name = stat.find('div', class_='col-mid').get_text(strip=True)
                stat_value = stat.find('div', class_='col-right').get_text(strip=True)
                
                player_data[player_name]["minecraft_party"][stat_name] = stat_value
                if log: logging.info(f"Scraped: {stat_name} = {stat_value}")
            
            break  # Exit the loop once Minecraft Party data is found

    if not player_data[player_name]["minecraft_party"]:
        logging.warning("No Minecraft Party data found for" + player_name)

    return player_data[player_name]["rank"]

# Scrape data for all players and display progress bar
progress_bar = tqdm(playerList, desc="Scraping Progress", unit="player")
for player_name in progress_bar:
    last_rank = scrape_player_data(player_name, log=False)
    progress_bar.set_postfix(last_rank=last_rank, last_player=player_name)

# Export data as JSON
with open('player_data.json', 'w', encoding='utf-8') as f:
    json.dump(player_data, f, ensure_ascii=False, indent=4)

print("Data exported to player_data.json")

# Export ranks to txt files
#for category, players in player_categories.items():
#    with open(f'{category}.txt', 'w', encoding='utf-8') as f:
#        for player in sorted(players):
#            f.write(f"{player}\n")
#    print(f"Exported {category}.txt")

# Quit the WebDriver
driver.quit()

Scraping Progress:  31%|███       | 69/221 [04:08<09:08,  3.61s/player, last_player=Joe3346, last_rank=Premium+]        


KeyboardInterrupt: 

In [None]:
# save to text file 
with open(f"out/ranks.txt", "w") as f:
    for category, players in player_categories.items():
        f.write(f"{category}: {players}\n")

In [None]:
print(playerList)

['20LeRe10', '2B9', '2wc', 'Acidey', 'Allooy', 'Anni808', 'ArthurAlchemist', 'Axollotel', 'Bartschii', 'BastiGHG', 'BauHD', 'Bikoop', 'BunnyKiko', 'BusinessBent', 'ByNetherdude', 'Carl1_1', 'Chander24', 'Cytoox', 'D4rkCookie', 'DarkCobweb', 'Dat_Klan', 'Davinci_Son', 'DerAutist', 'DieserBear', 'Dokkkkko', 'DrSpeed06', 'Dubbly', 'EinfachEazy', 'Ex4cted', 'Falke23_5', 'Falke_01', 'Feinberg', 'Fflopse', 'Flitzi_Dino', 'FluffyDragon2007', 'FlyingAutismus', 'FlyingKyubi', 'ForceFox', 'Freeeedom', 'FrozenNoah', 'Fussbild', 'G0at3D', 'GamemasterNiki', 'GeneralEnte06', 'GermanPie', 'Gerrygames', 'Gestimus', 'Gfrasti', 'GlowyDusk', 'Gobo9', 'GrafikKatze', 'Grapfen', 'Gryzes', 'Gummibearchen', 'Harold_Sensemann', 'HeIsJustAPoorBoy', 'HerosHD', 'HerrDante', 'Highsay', 'Hyxeed', 'IchHolzDichWeg', 'JOW23', 'JOW24', 'Jan2220', 'Janne4k', 'JayMinInSane', 'JennieKim', 'Joe3346', 'Joy_8oy', 'Julius16', 'Juti0n', 'KINT0', 'KakaMC', 'Kanickelul', 'Keenaai', 'Kokochampo', 'Kopfradio', 'Krusti', 'Kyuudo', 