In [1]:
# Initial imports
import numpy as np
import pandas as pd 
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
%matplotlib inline

import random
import urllib.request
import requests
from bs4 import BeautifulSoup
import warnings
warnings.filterwarnings('ignore')

In [2]:
base_url = "https://www.superherodb.com/characters/"
list_chars_cols = ['Name', 'Url']

url = base_url 
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "html.parser")

In [3]:
list_of_supers = soup.findAll('li', {'class': 'char-li'})

In [4]:
len(list_of_supers)

743

In [5]:
data = DataFrame(columns=list_chars_cols)
for l in list_of_supers:
    temp = DataFrame([[l.text, l.find('a').get('href')]])
    temp.columns = list_chars_cols
    data = data.append(temp, ignore_index=1)

In [6]:
data.head()

Unnamed: 0,Name,Url
0,3-D Man,/3-d-man/10-226/
1,A-Bomb,/a-bomb/10-10060/
2,Abe Sapien,/abe-sapien/10-956/
3,Abin Sur,/abin-sur/10-1460/
4,Abomination,/abomination/10-1/


In [7]:
char_base_url = 'https://www.superherodb.com'
stats_list = ['Intelligence', 'Strength', 'Speed', 'Durability', 'Power', 'Combat', 
              'Url', 'Intelligence', 'Strength', 'Speed', 'Durability', 'Power', 
              'Combat', 'Full name', 'Alter Egos', 'Aliases', 'Place of birth', 
              'First appearance', 'Creator', 'Alignment', 'Gender', 'Race', 'Height', 
              'Weight', 'Eye color', 'Hair color', 'Occupation', 'Base', 'Team Affiliation', 'Relatives']
stat_data = DataFrame(columns=stats_list)
for index, row in data.iterrows():
    stat_map = {'Url': str(row['Url'])}
    char_url = char_base_url + str(row['Url'])
    source_code = requests.get(char_url)
    plain_text = source_code.text
    soup = BeautifulSoup(plain_text, "html.parser")
    attrs = soup.findAll('div', {'class': 'gridbarholder'})
    personal = soup.findAll('table', {'class': 'table'})
    for a in attrs[:6]:
        stat_name = a.find('div', {'class': 'gridbarlabel'}).text
        stat_value = a.find('div', {'class': 'gridbarvalue'}).text
        stat_map[str(stat_name)] = stat_value
    for p in personal[:3]:
        trs = p.findAll('tr')
        for tr in trs:
            stat_map[str(tr.find('td').text.strip())] = str(tr.findAll('td')[1].text.strip())
    stat_map[str(personal[3].findAll('td')[0].text.strip())] = str(personal[3].findAll('td')[1].text.strip())
    stat_map[str(personal[3].findAll('td')[2].text.strip())] = str(personal[3].findAll('td')[3].text.strip())
    for key in stat_map.keys():
        stat_data.loc[index,key] = stat_map[key]

In [9]:
stat_data.head()

Unnamed: 0,Intelligence,Strength,Speed,Durability,Power,Combat,Url,Intelligence.1,Strength.1,Speed.1,...,Race,Height,Weight,Eye color,Hair color,Occupation,Base,Team Affiliation,Relatives,Skin color
0,80,35,45,35,25,55,/3-d-man/10-226/,80,35,45,...,-,6'2 // 188 cm,200 lb // 90 kg,Brown,Grey,"Test pilot, adventurer",-,"Agents of Atlas, Asgardians, Formerly: Avengers","Hal Chandler (brother), Peggy Clark (sister-in...",
1,75,100,20,80,25,65,/a-bomb/10-10060/,75,100,20,...,Human,6'8 // 203 cm,980 lb // 441 kg,Yellow,No Hair,"Musician, adventurer, author; formerly talk sh...",-,"Teen Brigade (Leader), Ultimate Fantastic Four...",Marlo Chandler-Jones (wife); Polly (aunt); Mrs...,
2,95,30,35,65,100,85,/abe-sapien/10-956/,95,30,35,...,Icthyo Sapien,6'3 // 191 cm,145 lb // 65 kg,Blue,No Hair,Paranormal Investigator,-,Bureau for Paranormal Research and Defense,"Edith Howard (wife, deceased)",Blue
3,80,90,55,65,100,65,/abin-sur/10-1460/,80,90,55,...,Ungaran,6'1 // 185 cm,200 lb // 90 kg,Blue,No Hair,"Green Lantern, former history professor",Oa,"Legion of Super-Heroes, Formerly: Green Lanter...","Amon Sur (son), Arin Sur (sister), Thaal Sines...",Red
4,85,80,55,90,65,95,/abomination/10-1/,85,80,55,...,Human / Radiation,6'8 // 203 cm,980 lb // 441 kg,Green,No Hair,Ex-Spy,Mobile,"Annihilators (Leader), Wrecking Crew, Masters ...","Nadia Dornova Blonsky (wife, separated)",


In [10]:
full_data = pd.merge(data, stat_data, left_index=True, right_index=True)

In [14]:
full_data.head()

Unnamed: 0,Name,Intelligence,Strength,Speed,Durability,Power,Combat,Url_y,Intelligence.1,Strength.1,...,Race,Height,Weight,Eye color,Hair color,Occupation,Base,Team Affiliation,Relatives,Skin color
0,3-D Man,80,35,45,35,25,55,/3-d-man/10-226/,80,35,...,-,6'2 // 188 cm,200 lb // 90 kg,Brown,Grey,"Test pilot, adventurer",-,"Agents of Atlas, Asgardians, Formerly: Avengers","Hal Chandler (brother), Peggy Clark (sister-in...",
1,A-Bomb,75,100,20,80,25,65,/a-bomb/10-10060/,75,100,...,Human,6'8 // 203 cm,980 lb // 441 kg,Yellow,No Hair,"Musician, adventurer, author; formerly talk sh...",-,"Teen Brigade (Leader), Ultimate Fantastic Four...",Marlo Chandler-Jones (wife); Polly (aunt); Mrs...,
2,Abe Sapien,95,30,35,65,100,85,/abe-sapien/10-956/,95,30,...,Icthyo Sapien,6'3 // 191 cm,145 lb // 65 kg,Blue,No Hair,Paranormal Investigator,-,Bureau for Paranormal Research and Defense,"Edith Howard (wife, deceased)",Blue
3,Abin Sur,80,90,55,65,100,65,/abin-sur/10-1460/,80,90,...,Ungaran,6'1 // 185 cm,200 lb // 90 kg,Blue,No Hair,"Green Lantern, former history professor",Oa,"Legion of Super-Heroes, Formerly: Green Lanter...","Amon Sur (son), Arin Sur (sister), Thaal Sines...",Red
4,Abomination,85,80,55,90,65,95,/abomination/10-1/,85,80,...,Human / Radiation,6'8 // 203 cm,980 lb // 441 kg,Green,No Hair,Ex-Spy,Mobile,"Annihilators (Leader), Wrecking Crew, Masters ...","Nadia Dornova Blonsky (wife, separated)",


In [12]:
full_data.drop('Url_x', 1,  inplace=True)
f = full_data.rename(index=str, columns={"Url_y": "Url"})

In [15]:
full_data.head()

Unnamed: 0,Name,Intelligence,Strength,Speed,Durability,Power,Combat,Url_y,Intelligence.1,Strength.1,...,Race,Height,Weight,Eye color,Hair color,Occupation,Base,Team Affiliation,Relatives,Skin color
0,3-D Man,80,35,45,35,25,55,/3-d-man/10-226/,80,35,...,-,6'2 // 188 cm,200 lb // 90 kg,Brown,Grey,"Test pilot, adventurer",-,"Agents of Atlas, Asgardians, Formerly: Avengers","Hal Chandler (brother), Peggy Clark (sister-in...",
1,A-Bomb,75,100,20,80,25,65,/a-bomb/10-10060/,75,100,...,Human,6'8 // 203 cm,980 lb // 441 kg,Yellow,No Hair,"Musician, adventurer, author; formerly talk sh...",-,"Teen Brigade (Leader), Ultimate Fantastic Four...",Marlo Chandler-Jones (wife); Polly (aunt); Mrs...,
2,Abe Sapien,95,30,35,65,100,85,/abe-sapien/10-956/,95,30,...,Icthyo Sapien,6'3 // 191 cm,145 lb // 65 kg,Blue,No Hair,Paranormal Investigator,-,Bureau for Paranormal Research and Defense,"Edith Howard (wife, deceased)",Blue
3,Abin Sur,80,90,55,65,100,65,/abin-sur/10-1460/,80,90,...,Ungaran,6'1 // 185 cm,200 lb // 90 kg,Blue,No Hair,"Green Lantern, former history professor",Oa,"Legion of Super-Heroes, Formerly: Green Lanter...","Amon Sur (son), Arin Sur (sister), Thaal Sines...",Red
4,Abomination,85,80,55,90,65,95,/abomination/10-1/,85,80,...,Human / Radiation,6'8 // 203 cm,980 lb // 441 kg,Green,No Hair,Ex-Spy,Mobile,"Annihilators (Leader), Wrecking Crew, Masters ...","Nadia Dornova Blonsky (wife, separated)",


In [16]:
full_data.to_csv('SuperheroDataset.csv', encoding='utf-8')