#### Этот датасет был собран на основе публикаций на сайте www.planetminecraft.com  в разделе "Самые популярные карты за всё время", посвященый популярной компьютерной игре Minecraft. 

Проанализировать планировалось топ-1000 карт, но в процессе парсинга что-то пошло не так и в датасете только 707 строк :)

Датасет включает следующие колонки:
- 'url' : ссылка на карту
- 'title' : название
- 'upload_date' : время загрузки
- 'update_date' : последнее время изменения
- 'global_views' : общее количество просмотров
- 'today_views' : количество просмотров сегодня (15 февраля 2023)
- 'global_downloads' : общее количество загрузок
- 'today_downloads' : количество загрузок сегодня (15 февраля 2023)
- 'diamonds_count' : количество алмазиков (особый вид награды)
- 'hearts_count' : количество сердечек (особый вид награды)
- 'comments_count' : количество комментариев

In [128]:
import requests
from bs4 import BeautifulSoup
import time

In [82]:
map_list_base_url = "https://www.planetminecraft.com"
map_list_url = map_list_base_url + "/projects/?order=order_downloads&p=1"

headers_raw = """Host: www.planetminecraft.com
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv;109.0) Gecko/20100101 Firefox/109.0
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8
Accept-Language: en-US,en;q=0.5
Accept-Encoding: gzip, deflate, br
DNT: 1
Connection: keep-alive
Cookie: PHPSESSID=866f96a2157204fb67c2dca8c83d9a9f
Upgrade-Insecure-Requests: 1
Sec-Fetch-Dest: document
Sec-Fetch-Mode: navigate
Sec-Fetch-Site: none
Sec-Fetch-User: ?1
Sec-GPC: 1
TE: trailers"""

headers = {header_raw.split(":")[0] : header_raw.split(":")[1][1:] for header_raw in headers_raw.split("\n")}

In [132]:
def get_maps_on_page(base_url: str, headers: dict, page_index: int) -> list:
    
    map_list_url = base_url + "/projects/?order=order_downloads&p=" + str(page_index)
    map_request = requests.get(map_list_url, headers=headers)
    map_page = BeautifulSoup(map_request.text, 'html.parser')
    
    result = list()

    map_list = map_page.findAll("ul", class_="resource_list")[0]
    for map_item in map_list.findAll("li"):


        if map_item['data-type'] == "resource":
            result.append(map_item.a['href'])
        
        
    return result

In [124]:
def get_map_details_by_url(url: str, headers: dict) -> dict:
    map_request = requests.get(url, headers=headers)
    map_page = BeautifulSoup(map_request.text, 'html.parser')
    
    map_url = url
    
    try:
        map_title = map_page.find(id="resource-title-text").text
    except:
        map_title = None
        
        
    try:
        map_date_info = map_page.findAll("div", class_="post_date txt-subtle")[0]
        map_upload_date = map_date_info.findAll("abbr")[0]['title']
        map_update_date = map_date_info.findAll("abbr")[1]['title']
    except:
        map_upload_date = None
        map_update_date = None
    
    
    try:
        map_visits_info = map_page.findAll("ul", class_="resource-statistics")[0]
        map_global_views = map_visits_info.findAll("li")[0].findAll("span")[0].text
        map_today_views = map_visits_info.findAll("li")[0].findAll("span")[1].text
        map_global_downloads = map_visits_info.findAll("li")[1].findAll("span")[0].text
        map_today_downloads = map_visits_info.findAll("li")[1].findAll("span")[1].text
    
    except:
        map_global_views = None
        map_today_views = None
        map_global_downloads = None
        map_today_downloads = None
        
    try:
        map_diamonds_count = map_page.findAll("span", class_="c-num-votes stat txtlrg")[0].text
    except:
        map_diamonds_count = None
    
    try:
        map_hearts_count = map_page.findAll("span", class_="c-num-favs stat txtlrg")[0].text
    except:
        map_hearts_count = None
    
    try:
        map_comments_count = map_page.findAll("span", class_="num_comments stat txtlrg")[0].text
    except:
        map_comments_count = None
    
    
    result = {
        'url' : map_url,
        'title' : map_title,
        'upload_date' : map_upload_date,
        'update_date' : map_update_date,
        'global_views' : map_global_views,
        'today_views' : map_today_views,
        'global_downloads' : map_global_downloads,
        'today_downloads' : map_today_downloads,
        'diamonds_count' : map_diamonds_count,
        'hearts_count' : map_hearts_count,
        'comments_count' : map_comments_count
    }
    
    return result

In [145]:
data_list = list()


for page_index in range(1, 41):
    page_maps_urls = get_maps_on_page(map_list_base_url, headers, page_index)
    
    print(f"--- Page №{page_index} start")
    
    for map_url in page_maps_urls:
        data_list.append(get_map_details_by_url(map_list_base_url + map_url, headers))
        time.sleep(.3)
        
        print(f"{data_list[-1]['title']} processed")

--- Page №1 start
[1.5]  THE DROPPER processed
[1.5] THE DROPPER 2; Newton VS Darwin processed
Greenfield - The Largest City In Minecraft - V0.5.4 processed
Imperial City processed
Cops and Robbers 4: High Security processed
The Temple of Notch processed
SkyGrid Survival Map (Updated for 1.5) processed
SUPER Pirate Battle Royale processed
1.16.1 2020 | TOY STORY 2 ADVENTURE MAP processed
Hillside Manor World [1.8] 4 Year Anniversary processed
Eldaria Islands - V3.1 - 50  biomes - 3400 by 3100 jungle, ores, horses, etc ! processed
Sonic The Hedgehog processed
Custom Terrain: The Sunken Island Adventure(1.2.5) processed
Mountain Piston House (+30 Mechanisms!)[Survival] processed
Modern City [Download] processed
Modern Redstone Smart House processed
Mega Redstone House (100+ Redstone Creations/Redstone Systems!!!)[Survival][All versions][.zip] processed
SethBling's Minecraft TNT Olympics processed
Witchcraft and Wizardry - The Floo Network (Download) processed
Herobrine's Mansion (Adventu

Hogwarts - Perfect Recreation, best ever made! processed
USS Enterprise NCC-1701A (see my other project for download) processed
World of Worlds processed
Project Zearth v.14.1 (©2010-2020) - Oldest City Project In Minecraft! processed
KINGDOMS [4x4] BedWars - Map [DOWNLOAD] processed
The Legend of Zelda: Minecraft (Adventure Map) processed
Survival Map: Arctic Abyss processed
Cube Block  (+75.000 Downloads!) [Survival/Skyblock] processed
Minecraft: DOTA processed
Razul - Skyrim Inspired Adventure Map processed
--- Page №8 start
Abandoned Kingdom [ Custom Map ] With Villages, ruins, ores   Custom Terrain! 1.8.7 ready processed
This is My Biome processed
Fully Working vanilla Bed Wars (NO MODS) processed
Cyberpunk Project Timelapse processed
The Survival Games Ultimate processed
Playable Guitar processed
Wrath of the Fallen (Minecraft Adventure Map) processed
The Ceranese Hotel - Minecraft's Largest Hotel processed
Desert City of Alamut processed
[Transcend] Modern House processed
Divers

BEST MCPE REDSTONE HOUSE (Pocket Edition) processed
Beach Town Project - Work in Progress processed
Temeraire's Islands - Dragon Realm processed
PacMan Arcade (updated to V2.0) processed
Stampy's lovely world processed
Minecraft: Catching Fire Arena! (3rd Quarter Quell Hunger Games) processed
Ant Farm Survival processed
Notchland Amusement Park 3.0 60+ Mini Games and 13 Coasters and over 1 million+ downloads! processed
Pit Frenzy: 1v1 Competitive Mob Arena Game processed
The Zone (Chernobyl Exclusion Zone) processed
|OLD, CHECK IMPROVED VERSION| FNaF vanilla 3D |Freddy Fazbear's Pizzeria| RP processed
Hello Neighbor Map!!! (NO MODS OR COMMANDS!!) processed
Huge Cruise ship (DOWNLOAD) processed
Castle (WIP) processed
The world of Steven Universe | 1.9 processed
Pixelmon Adventure Map - Crew Pixelmon (1.12.2 UPDATED) processed
Fallen Kingdom Map processed
Mechs in One Command | Create Your Own Mech Suit! processed
The Teen Titans Tower processed
Paris - Eiffel Tower processed
14 Floor In

Bowling in Vanilla Minecraft processed
Roman City [Download] processed
World War 3 Missile Base processed
Alazar - factions server spawn with download processed
Bowser Boss with only one command block processed
--- Page №21 start
Jurassic Park 1.0 + Download processed
Pixelmon Map [1.7.10] processed
SCP-087-B - Horror Adventure Map 1.6.4 processed
Dont Take Damage!!! v 1.0 processed
YogLabs [Replica] 1.6+ w/Resourcepack and Now with TechnicPacks! processed
Skyblock 1.14 processed
Guess Who 2.0 processed
Lucky Block race 1.16.5 processed
Medieval Houses Bundle - with schematics processed
Atropos - V2.5 processed
Massive Spawn Tower processed
Halion processed
One Block LuckyBlock processed
Minecraft Ps4: Blockville City 1.4 [Ps4] processed
ColoradoRiver | Modern City [Version 1.0] processed
Sky Den For Pc! processed
Pixelmon Lustrous Topaz Version - Pixelmon Adventure Map [v1.6] processed
Wave Heroes processed
Bad Santa - Adventure Map processed
Porks Play Minecraft Map processed
Among U

--- Page №28 start
The recreation of the Earth 1:1500 scale (version2.1!) processed
✪ DOWNLOAD ✪ Naruto World - Konoha, Village Hidden in the Leafs ✔ processed
Minecraft Hotel processed
Diversity 2 processed
Underwater City ~ Lumina Nocturnale ~ processed
TazerCraft - Chume Labs 2 (Mapa Download) processed
Open World Zombie Apocalypse! processed
Piston House 100 MECHANISMS! / Casa de Redstone 100 mecanismos! [Survival][1.8-1.9-1.10] processed
Broville v10 - v11 now released! processed
Royal Palace processed
Herobrine's Return (1.5 Minecraft Adventure Map) processed
[PUZZ/PARKOUR] Dimension jumper processed
Fantasy Tree Repository processed
Minecraft Tony Stark House 4.5 processed
Lapiz Point - Modern City | TRS - Now with download! [DISCONTINUED] processed
Minas Tirith - Capital of Gondor processed
Pirate Island (MAP LINK!!!) processed
Team Fortress 2: Dustbowl by Hypixel and SethBling processed
Showcase - King's Landing: An epic city by Westeroscraft (Download!) processed
Missile Wars

Parkour Map v1.1 - Christmas Calendar - minecraftpg5 processed
Zombie Siege processed
Star Wars Adventure Map (Snapshot required) processed
Ironman in Vanilla Minecraft processed
The Walls 2 - PvP Survival processed
The Heist | Adventure Map (for 1.8.7 & 1.8.8) v1.4b processed
City of Newisle v1.2 |  Solo-built modern Minecraft city | Still in active development! processed
Custom Tree Repository processed
100% Working Redstone Printer 1.5 processed
Survival Map: Underground 1.1 - You'll never see the sunlight again - Underground 2 1.6.2 + out now !!! processed
Assassin's Creep: An Open World Parkour Map processed
Plants vs Zombies (Playable) processed
[ADVENTURE MAP] THE TOURIST processed
Survival Map: ATLANTIS - Take your last breath - Minecraft 1.5.1 processed
--- Page №35 start
◄=The Babylon Project=► New York City 1936. processed
Skyblock 4.11.2 processed
Wipeout processed
Survival Island - Stranded [v1.4] processed
Cake Defense 1.4.x processed
The Survival Games - San Andreas (Dow

In [155]:
unique_data = list()

for item in data_list:
    if item not in unique_data:
        unique_data.append(item)
        
len(unique_data)

707

In [156]:
import pandas as pd

df = pd.DataFrame.from_dict(unique_data)

df

Unnamed: 0,url,title,upload_date,update_date,global_views,today_views,global_downloads,today_downloads,diamonds_count,hearts_count,comments_count
0,https://www.planetminecraft.com/project/advpuz...,[1.5] THE DROPPER,2012-11-04T11:12:41-05:00,2012-07-16T18:22:55-04:00,3442561,258,1867259,54,2226,951,838
1,https://www.planetminecraft.com/project/the-dr...,[1.5] THE DROPPER 2; Newton VS Darwin,,,2108469,136,1232046,14,992,386,519
2,https://www.planetminecraft.com/project/greenf...,Greenfield - The Largest City In Minecraft - V...,2022-10-15T11:14:09-04:00,2011-08-29T10:46:09-04:00,3696542,638,1205053,84,4739,3543,9674
3,https://www.planetminecraft.com/project/monume...,Imperial City,2013-08-19T13:51:49-04:00,2012-03-11T11:14:13-04:00,2671716,111,1066744,19,4419,2070,1469
4,https://www.planetminecraft.com/project/cops-a...,Cops and Robbers 4: High Security,2020-02-21T12:27:41-05:00,2013-04-03T14:25:14-04:00,2746901,64,1056126,1,1467,613,1342
...,...,...,...,...,...,...,...,...,...,...,...
702,https://www.planetminecraft.com/project/midtow...,"Midtown Manhattan, New York City (DOWNLOAD V2.8)",2022-12-30T14:41:17-05:00,2013-12-11T05:23:37-05:00,542108,197,130447,24,1098,753,700
703,https://www.planetminecraft.com/project/skyblo...,Skyblock 4.11.2,2022-06-21T00:10:02-04:00,2018-12-16T23:04:16-05:00,2249909,918,388006,172,1103,1044,3751
704,https://www.planetminecraft.com/project/the-su...,The Survival Games - San Andreas (Download),2014-02-18T16:27:05-05:00,2013-05-09T17:45:49-04:00,767209,40,319312,6,624,356,260
705,https://www.planetminecraft.com/project/native...,Custom Tree Repository,2013-08-18T07:05:48-04:00,2012-12-25T05:42:54-05:00,692656,157,257639,34,1506,865,485


In [159]:
df.to_excel("minecraft_maps.xlsx")