# Imports

In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

import json
import time

# Collect zkillboard Alltime stats (dead)

- 12090 alliances, ~30kb details/record, ~1 records/second -> ~370MB total, ~3.5hr total runtime

For example: 

- <https://zkillboard.com/alliance/495729389/>
- <https://zkillboard.com/api/stats/allianceID/495729389/>


In [2]:
df_dead_corps = pd.read_csv("data/alliances_dead_corporations.csv")

alliances = df_dead_corps["AllianceID"].to_list()

# scrape responsibly!!!
headers = {
    'Accept-Encoding': 'gzip',
    'User-Agent': 'https://adamcoscia.github.io/ | Adam Coscia | acoscia125@gmail.com',
}

dfs = []
columns = [
    "Ships Alltime Destroyed",
    "Ships Alltime Destroyed Rank",
    "Ships Alltime Lost",
    "Ships Alltime Lost Rank",
    "Ships Alltime Efficiency",
    "Ships Alltime Rank",
    "Points Alltime Destroyed",
    "Points Alltime Destroyed Rank",
    "Points Alltime Lost",
    "Points Alltime Lost Rank",
    "Points Alltime Efficiency",
    "Points Alltime Rank",
    "ISK Alltime Destroyed",
    "ISK Alltime Destroyed Rank",
    "ISK Alltime Lost",
    "ISK Alltime Lost Rank",
    "ISK Alltime Efficiency",
    "ISK Alltime Rank",
    "Danger Ratio",
    "Snuggly Ratio",
    "Gang Ratio",
    "Solo Ratio",
]

i = 0
n = len(alliances)
for alliance_id in alliances:
    i += 1
    print(f"{i}/{n}", end="\r")
    
    # Scrape zkillboard API
    try:
        url_zkb_api = f"https://zkillboard.com/api/stats/allianceID/{int(alliance_id)}/"
        while True:
            try:
                r = requests.get(url_zkb_api, headers=headers) # Scrape url
                stats = json.loads(r.text)
                break
            except:
                continue
    except:
        stats = None # alliance_id may be None or NaN

    try:
        ships_alltime_destroyed = stats["shipsDestroyed"]
    except:
        ships_alltime_destroyed = 0
    try:
        ships_alltime_lost = stats["shipsLost"]
    except:
        ships_alltime_lost = 0
    try:
        points_alltime_destroyed = stats["pointsDestroyed"]
    except:
        points_alltime_destroyed = 0
    try:
        points_alltime_lost = stats["pointsLost"]
    except:
        points_alltime_lost = 0
    try:
        isk_alltime_destroyed = stats["iskDestroyed"]
    except:
        isk_alltime_destroyed = 0
    try:
        isk_alltime_lost = stats["iskLost"]
    except:
        isk_alltime_lost = 0
    try:
        danger_ratio = stats["dangerRatio"]
        snuggly_ratio = 100 - danger_ratio
    except:
        danger_ratio = np.nan
        snuggly_ratio = np.nan
    try:
        gang_ratio = stats["gangRatio"]
        solo_ratio = 100 - gang_ratio
    except:
        gang_ratio = np.nan
        solo_ratio = np.nan

    # Scrape zkillboard directly for info not available through API
    try:
        url_zkb = f"https://zkillboard.com/alliance/{int(alliance_id)}/"
        while True:
            try:
                r = requests.get(url_zkb) # Scrape url
                break
            except:
                continue
    except:
        r = None # alliance_id may be None or NaN

    try:
        soup = BeautifulSoup(r.text, "html.parser") # parse text with bs
        rows = soup.find_all("tr", class_="alltime-ranks") # every page has 3 alltime-ranks rows
    except:
        rows = None # page does not exist?
    
    # Row 1 (ships)
    try:
        elements = rows[0].find_all(recursive=False)
    except:
        elements = None  # page does not exist
    try:
        ships_alltime_destroyed_rank = int(elements[2].text.strip().replace(",", ""))
    except:
        ships_alltime_destroyed_rank = np.nan
    try:
        ships_alltime_lost_rank = int(elements[4].text.strip().replace(",", ""))
    except:
        ships_alltime_lost_rank = np.nan
    try:
        ships_alltime_efficiency = float(elements[5].text.strip())
    except:
        ships_alltime_efficiency = np.nan
    try:
        ships_alltime_rank = float(elements[6].text.strip().replace(",", ""))
    except:
        ships_alltime_rank = np.nan
    
    # Row 2 (points)
    try:
        elements = rows[1].find_all(recursive=False)
    except:
        elements = None  # page does not exist
    try:
        points_alltime_destroyed_rank = int(elements[2].text.strip().replace(",", ""))
    except:
        points_alltime_destroyed_rank = np.nan
    try:
        points_alltime_lost_rank = int(elements[4].text.strip().replace(",", ""))
    except:
        points_alltime_lost_rank = np.nan
    try:
        points_alltime_efficiency = float(elements[5].text.strip())
    except:
        points_alltime_efficiency = np.nan
    try:
        points_alltime_rank = float(elements[6].text.strip().replace(",", ""))
    except:
        points_alltime_rank = np.nan

    # Row 3 (isk)
    try:
        elements = rows[2].find_all(recursive=False)
    except:
        elements = None  # page does not exist
    try:
        isk_alltime_destroyed_rank = int(elements[2].text.strip().replace(",", ""))
    except:
        isk_alltime_destroyed_rank = np.nan
    try:
        isk_alltime_lost_rank = int(elements[4].text.strip().replace(",", ""))
    except:
        isk_alltime_lost_rank = np.nan
    try:
        isk_alltime_efficiency = float(elements[5].text.strip())
    except:
        isk_alltime_efficiency = np.nan
    try:
        isk_alltime_rank = float(elements[6].text.strip().replace(",", ""))
    except:
        isk_alltime_rank = np.nan

    # Combine stats into single row/DataFrame
    row = [
        ships_alltime_destroyed,
        ships_alltime_destroyed_rank,
        ships_alltime_lost,
        ships_alltime_lost_rank,
        ships_alltime_efficiency,
        ships_alltime_rank,
        points_alltime_destroyed,
        points_alltime_destroyed_rank,
        points_alltime_lost,
        points_alltime_lost_rank,
        points_alltime_efficiency,
        points_alltime_rank,
        isk_alltime_destroyed,
        isk_alltime_destroyed_rank,
        isk_alltime_lost,
        isk_alltime_lost_rank,
        isk_alltime_efficiency,
        isk_alltime_rank,
        danger_ratio,
        snuggly_ratio,
        gang_ratio,
        solo_ratio
    ]
    df = pd.DataFrame([row], columns=columns)
    dfs.append(df)
    
    # limit the rate to the same domain by 1s
    time.sleep(1)

# combine lists of objects into dataframes
df_stats = pd.concat(dfs).reset_index(drop=True)

# combine all the dataframes and reorder columns
df_dead_corps_stats = pd.concat([df_dead_corps, df_stats], axis=1)
cols = df_dead_corps_stats.columns.tolist()
cols.append(cols.pop(cols.index('Former corporations')))
df_dead_corps_stats = df_dead_corps_stats[cols]

# write to file
df_dead_corps_stats.to_csv("data/alliances_dead_corporations_stats.csv", index=False)

df_dead_corps_stats

1450/12090

ValueError: cannot convert float NaN to integer

In [4]:
df_dead_corps = pd.read_csv("data/alliances_dead_corporations.csv")



df_dead_corps[df_dead_corps["AllianceID"].isna()]

Unnamed: 0,Alliance,Systems,Members,Corps,Ticker,Faction,Founded,Closed,AllianceID,Former corporations
1449,Brotherhood Of Steel,0,0,0,,,,,,[]
1748,Chaos Incarnate.,0,0,0,,,,,,[]
3071,EFF-TerraCorps Orbital,0,0,0,,,,,,[]
3322,EVE Alliance #56845236,0,0,0,,,,,,[]
5177,Irano Research Alliance,0,0,0,,,,,,[]
5316,judgement Alliance,0,0,0,,,,,,[]
5778,Lonehorn's Astral Alliance,0,0,0,,,,,,[]
5787,LONG ZU Alliance,0,0,0,,,,,,[]
6165,Miners United,0,0,0,,,,,,[]
6221,Mjolnir Alliance,0,0,0,,,,,,[]
